From 3d62000adcec9b6e4a2d7ca89020f3bf68ece2ef Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Wed, 23 Aug 2017 18:36:46 -0700 Subject: [PATCH] archive/tar: return better WriteHeader errors WriteHeader may fail to encode a header for any number of reasons, which can be frustrating for the user when trying to create a tar archive. As we validate the Header, we generate an informative error message intended for human consumption and return that if and only if no format can be selected. This allows WriteHeader to return informative errors like: tar: cannot encode header: invalid PAX record: "linkpath = \x00hello" tar: cannot encode header: invalid PAX record: "SCHILY.xattr.foo=bar = baz" tar: cannot encode header: Format specifies GNU; and only PAX supports Xattrs tar: cannot encode header: Format specifies GNU; and GNU cannot encode ModTime=1969-12-31 15:59:59.0000005 -0800 PST tar: cannot encode header: Format specifies GNU; and GNU supports sparse files only with TypeGNUSparse tar: cannot encode header: Format specifies USTAR; and USTAR cannot encode ModTime=292277026596-12-04 07:30:07 -0800 PST tar: cannot encode header: Format specifies USTAR; and USTAR does not support sparse files tar: cannot encode header: Format specifies PAX; and only GNU supports TypeGNUSparse Updates #18710 Change-Id: I82a498d6f29d02c4e73bce47b768eb578da8499c Reviewed-on: https://go-review.googlesource.com/58310 Run-TryBot: Joe Tsai Reviewed-by: Ian Lance Taylor --- src/archive/tar/common.go | 103 +++++++++++++++++++++++++-------- src/archive/tar/tar_test.go | 43 +++++++++++++- src/archive/tar/writer.go | 5 +- src/archive/tar/writer_test.go | 24 +++++--- 4 files changed, 137 insertions(+), 38 deletions(-) diff --git a/src/archive/tar/common.go b/src/archive/tar/common.go index 89d3b099b1..e9a3499a64 100644 --- a/src/archive/tar/common.go +++ b/src/archive/tar/common.go @@ -14,6 +14,7 @@ import ( "os" "path" "strconv" + "strings" "time" ) @@ -31,6 +32,22 @@ var ( errWriteHole = errors.New("tar: write non-NUL byte in sparse hole") ) +type headerError []string + +func (he headerError) Error() string { + const prefix = "tar: cannot encode header" + var ss []string + for _, s := range he { + if s != "" { + ss = append(ss, s) + } + } + if len(ss) == 0 { + return prefix + } + return fmt.Sprintf("%s: %v", prefix, strings.Join(ss, "; and ")) +} + // Header type flags. const ( TypeReg = '0' // regular file @@ -215,62 +232,73 @@ func (h *Header) FileInfo() os.FileInfo { return headerFileInfo{h} } -// allowedFormats determines which formats can be used. The value returned -// is the logical OR of multiple possible formats. If the value is -// FormatUnknown, then the input Header cannot be encoded. +// allowedFormats determines which formats can be used. +// The value returned is the logical OR of multiple possible formats. +// If the value is FormatUnknown, then the input Header cannot be encoded +// and an error is returned explaining why. // // As a by-product of checking the fields, this function returns paxHdrs, which // contain all fields that could not be directly encoded. -func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string) { +func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err error) { format = FormatUSTAR | FormatPAX | FormatGNU paxHdrs = make(map[string]string) - verifyString := func(s string, size int, paxKey string) { + var whyNoUSTAR, whyNoPAX, whyNoGNU string + verifyString := func(s string, size int, name, paxKey string) { // NUL-terminator is optional for path and linkpath. // Technically, it is required for uname and gname, // but neither GNU nor BSD tar checks for it. tooLong := len(s) > size allowLongGNU := paxKey == paxPath || paxKey == paxLinkpath if hasNUL(s) || (tooLong && !allowLongGNU) { + whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%q", name, s) format.mustNotBe(FormatGNU) } if !isASCII(s) || tooLong { canSplitUSTAR := paxKey == paxPath if _, _, ok := splitUSTARPath(s); !canSplitUSTAR || !ok { + whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%q", name, s) format.mustNotBe(FormatUSTAR) } if paxKey == paxNone { + whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%q", name, s) format.mustNotBe(FormatPAX) } else { paxHdrs[paxKey] = s } } } - verifyNumeric := func(n int64, size int, paxKey string) { + verifyNumeric := func(n int64, size int, name, paxKey string) { if !fitsInBase256(size, n) { + whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%d", name, n) format.mustNotBe(FormatGNU) } if !fitsInOctal(size, n) { + whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%d", name, n) format.mustNotBe(FormatUSTAR) if paxKey == paxNone { + whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%d", name, n) format.mustNotBe(FormatPAX) } else { paxHdrs[paxKey] = strconv.FormatInt(n, 10) } } } - verifyTime := func(ts time.Time, size int, paxKey string) { + verifyTime := func(ts time.Time, size int, name, paxKey string) { if ts.IsZero() { return // Always okay } needsNano := ts.Nanosecond() != 0 hasFieldUSTAR := paxKey == paxMtime if !fitsInBase256(size, ts.Unix()) || needsNano { + whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%v", name, ts) format.mustNotBe(FormatGNU) } if !fitsInOctal(size, ts.Unix()) || needsNano || !hasFieldUSTAR { + whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%v", name, ts) format.mustNotBe(FormatUSTAR) if paxKey == paxNone { + whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%v", name, ts) format.mustNotBe(FormatPAX) } else { paxHdrs[paxKey] = formatPAXTime(ts) @@ -278,61 +306,86 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string) { } } + // Check basic fields. var blk block v7 := blk.V7() ustar := blk.USTAR() gnu := blk.GNU() - verifyString(h.Name, len(v7.Name()), paxPath) - verifyString(h.Linkname, len(v7.LinkName()), paxLinkpath) - verifyString(h.Uname, len(ustar.UserName()), paxUname) - verifyString(h.Gname, len(ustar.GroupName()), paxGname) - verifyNumeric(h.Mode, len(v7.Mode()), paxNone) - verifyNumeric(int64(h.Uid), len(v7.UID()), paxUid) - verifyNumeric(int64(h.Gid), len(v7.GID()), paxGid) - verifyNumeric(h.Size, len(v7.Size()), paxSize) - verifyNumeric(h.Devmajor, len(ustar.DevMajor()), paxNone) - verifyNumeric(h.Devminor, len(ustar.DevMinor()), paxNone) - verifyTime(h.ModTime, len(v7.ModTime()), paxMtime) - verifyTime(h.AccessTime, len(gnu.AccessTime()), paxAtime) - verifyTime(h.ChangeTime, len(gnu.ChangeTime()), paxCtime) + verifyString(h.Name, len(v7.Name()), "Name", paxPath) + verifyString(h.Linkname, len(v7.LinkName()), "Linkname", paxLinkpath) + verifyString(h.Uname, len(ustar.UserName()), "Uname", paxUname) + verifyString(h.Gname, len(ustar.GroupName()), "Gname", paxGname) + verifyNumeric(h.Mode, len(v7.Mode()), "Mode", paxNone) + verifyNumeric(int64(h.Uid), len(v7.UID()), "Uid", paxUid) + verifyNumeric(int64(h.Gid), len(v7.GID()), "Gid", paxGid) + verifyNumeric(h.Size, len(v7.Size()), "Size", paxSize) + verifyNumeric(h.Devmajor, len(ustar.DevMajor()), "Devmajor", paxNone) + verifyNumeric(h.Devminor, len(ustar.DevMinor()), "Devminor", paxNone) + verifyTime(h.ModTime, len(v7.ModTime()), "ModTime", paxMtime) + verifyTime(h.AccessTime, len(gnu.AccessTime()), "AccessTime", paxAtime) + verifyTime(h.ChangeTime, len(gnu.ChangeTime()), "ChangeTime", paxCtime) + // Check for header-only types. + var whyOnlyPAX, whyOnlyGNU string if !isHeaderOnlyType(h.Typeflag) && h.Size < 0 { - return FormatUnknown, nil + return FormatUnknown, nil, headerError{"negative size on header-only type"} } + + // Check PAX records. if len(h.Xattrs) > 0 { for k, v := range h.Xattrs { paxHdrs[paxXattr+k] = v } + whyOnlyPAX = "only PAX supports Xattrs" format.mayOnlyBe(FormatPAX) } for k, v := range paxHdrs { // Forbid empty values (which represent deletion) since usage of // them are non-sensible without global PAX record support. if !validPAXRecord(k, v) || v == "" { - return FormatUnknown, nil // Invalid PAX key + return FormatUnknown, nil, headerError{fmt.Sprintf("invalid PAX record: %q", k+" = "+v)} } } + + // Check sparse files. if len(h.SparseHoles) > 0 || h.Typeflag == TypeGNUSparse { if isHeaderOnlyType(h.Typeflag) { - return FormatUnknown, nil // Cannot have sparse data on header-only file + return FormatUnknown, nil, headerError{"header-only type cannot be sparse"} } if !validateSparseEntries(h.SparseHoles, h.Size) { - return FormatUnknown, nil + return FormatUnknown, nil, headerError{"invalid sparse holes"} } if h.Typeflag == TypeGNUSparse { + whyOnlyGNU = "only GNU supports TypeGNUSparse" format.mayOnlyBe(FormatGNU) } else { + whyNoGNU = "GNU supports sparse files only with TypeGNUSparse" format.mustNotBe(FormatGNU) } + whyNoUSTAR = "USTAR does not support sparse files" format.mustNotBe(FormatUSTAR) } + + // Check desired format. if wantFormat := h.Format; wantFormat != FormatUnknown { if wantFormat.has(FormatPAX) { wantFormat.mayBe(FormatUSTAR) // PAX implies USTAR allowed too } format.mayOnlyBe(wantFormat) // Set union of formats allowed and format wanted } - return format, paxHdrs + if format == FormatUnknown { + switch h.Format { + case FormatUSTAR: + err = headerError{"Format specifies USTAR", whyNoUSTAR, whyOnlyPAX, whyOnlyGNU} + case FormatPAX: + err = headerError{"Format specifies PAX", whyNoPAX, whyOnlyGNU} + case FormatGNU: + err = headerError{"Format specifies GNU", whyNoGNU, whyOnlyPAX} + default: + err = headerError{whyNoUSTAR, whyNoPAX, whyNoGNU, whyOnlyPAX, whyOnlyGNU} + } + } + return format, paxHdrs, err } // headerFileInfo implements os.FileInfo. diff --git a/src/archive/tar/tar_test.go b/src/archive/tar/tar_test.go index db83690976..abbf9615e3 100644 --- a/src/archive/tar/tar_test.go +++ b/src/archive/tar/tar_test.go @@ -550,6 +550,10 @@ func TestHeaderAllowedFormats(t *testing.T) { header: &Header{Xattrs: map[string]string{"foo": "bar"}}, paxHdrs: map[string]string{paxXattr + "foo": "bar"}, formats: FormatPAX, + }, { + header: &Header{Xattrs: map[string]string{"foo": "bar"}, Format: FormatGNU}, + paxHdrs: map[string]string{paxXattr + "foo": "bar"}, + formats: FormatUnknown, }, { header: &Header{Xattrs: map[string]string{"用戶名": "\x00hello"}}, paxHdrs: map[string]string{paxXattr + "用戶名": "\x00hello"}, @@ -574,6 +578,10 @@ func TestHeaderAllowedFormats(t *testing.T) { header: &Header{ModTime: time.Unix(math.MaxInt64, 0)}, paxHdrs: map[string]string{paxMtime: "9223372036854775807"}, formats: FormatPAX | FormatGNU, + }, { + header: &Header{ModTime: time.Unix(math.MaxInt64, 0), Format: FormatUSTAR}, + paxHdrs: map[string]string{paxMtime: "9223372036854775807"}, + formats: FormatUnknown, }, { header: &Header{ModTime: time.Unix(-1, 0)}, paxHdrs: map[string]string{paxMtime: "-1"}, @@ -582,6 +590,10 @@ func TestHeaderAllowedFormats(t *testing.T) { header: &Header{ModTime: time.Unix(-1, 500)}, paxHdrs: map[string]string{paxMtime: "-0.9999995"}, formats: FormatPAX, + }, { + header: &Header{ModTime: time.Unix(-1, 500), Format: FormatGNU}, + paxHdrs: map[string]string{paxMtime: "-0.9999995"}, + formats: FormatUnknown, }, { header: &Header{AccessTime: time.Unix(0, 0)}, paxHdrs: map[string]string{paxAtime: "0"}, @@ -594,15 +606,40 @@ func TestHeaderAllowedFormats(t *testing.T) { header: &Header{ChangeTime: time.Unix(123, 456)}, paxHdrs: map[string]string{paxCtime: "123.000000456"}, formats: FormatPAX, + }, { + header: &Header{ChangeTime: time.Unix(123, 456), Format: FormatGNU}, + paxHdrs: map[string]string{paxCtime: "123.000000456"}, + formats: FormatUnknown, + }, { + header: &Header{Name: "sparse.db", Size: 1000, SparseHoles: []SparseEntry{{0, 500}}}, + formats: FormatPAX, + }, { + header: &Header{Name: "sparse.db", Size: 1000, Typeflag: TypeGNUSparse, SparseHoles: []SparseEntry{{0, 500}}}, + formats: FormatGNU, + }, { + header: &Header{Name: "sparse.db", Size: 1000, SparseHoles: []SparseEntry{{0, 500}}, Format: FormatGNU}, + formats: FormatUnknown, + }, { + header: &Header{Name: "sparse.db", Size: 1000, Typeflag: TypeGNUSparse, SparseHoles: []SparseEntry{{0, 500}}, Format: FormatPAX}, + formats: FormatUnknown, + }, { + header: &Header{Name: "sparse.db", Size: 1000, SparseHoles: []SparseEntry{{0, 500}}, Format: FormatUSTAR}, + formats: FormatUnknown, }} for i, v := range vectors { - formats, paxHdrs := v.header.allowedFormats() + formats, paxHdrs, err := v.header.allowedFormats() if formats != v.formats { - t.Errorf("test %d, allowedFormats(...): got %v, want %v", i, formats, v.formats) + t.Errorf("test %d, allowedFormats(): got %v, want %v", i, formats, v.formats) } if formats&FormatPAX > 0 && !reflect.DeepEqual(paxHdrs, v.paxHdrs) && !(len(paxHdrs) == 0 && len(v.paxHdrs) == 0) { - t.Errorf("test %d, allowedFormats(...):\ngot %v\nwant %s", i, paxHdrs, v.paxHdrs) + t.Errorf("test %d, allowedFormats():\ngot %v\nwant %s", i, paxHdrs, v.paxHdrs) + } + if (formats != FormatUnknown) && (err != nil) { + t.Errorf("test %d, unexpected error: %v", i, err) + } + if (formats == FormatUnknown) && (err == nil) { + t.Errorf("test %d, got nil-error, want non-nil error", i) } } } diff --git a/src/archive/tar/writer.go b/src/archive/tar/writer.go index 765c85585d..c04b30ad45 100644 --- a/src/archive/tar/writer.go +++ b/src/archive/tar/writer.go @@ -72,7 +72,8 @@ func (tw *Writer) WriteHeader(hdr *Header) error { } tw.hdr = *hdr // Shallow copy of Header - switch allowedFormats, paxHdrs := tw.hdr.allowedFormats(); { + allowedFormats, paxHdrs, err := tw.hdr.allowedFormats() + switch { case allowedFormats.has(FormatUSTAR): tw.err = tw.writeUSTARHeader(&tw.hdr) return tw.err @@ -83,7 +84,7 @@ func (tw *Writer) WriteHeader(hdr *Header) error { tw.err = tw.writeGNUHeader(&tw.hdr) return tw.err default: - return ErrHeader // Non-fatal error + return err // Non-fatal error } } diff --git a/src/archive/tar/writer_test.go b/src/archive/tar/writer_test.go index e636162b6a..1d62055391 100644 --- a/src/archive/tar/writer_test.go +++ b/src/archive/tar/writer_test.go @@ -222,14 +222,14 @@ func TestWriter(t *testing.T) { Typeflag: TypeReg, Name: "bad-null.txt", Xattrs: map[string]string{"null\x00null\x00": "fizzbuzz"}, - }, ErrHeader}, + }, headerError{}}, }, }, { tests: []testFnc{ testHeader{Header{ Typeflag: TypeReg, Name: "null\x00.txt", - }, ErrHeader}, + }, headerError{}}, }, }, { file: "testdata/gnu-utf8.tar", @@ -376,6 +376,14 @@ func TestWriter(t *testing.T) { }, }} + equalError := func(x, y error) bool { + _, ok1 := x.(headerError) + _, ok2 := y.(headerError) + if ok1 || ok2 { + return ok1 && ok2 + } + return x == y + } for _, v := range vectors { t.Run(path.Base(v.file), func(t *testing.T) { const maxSize = 10 << 10 // 10KiB @@ -386,22 +394,22 @@ func TestWriter(t *testing.T) { switch tf := tf.(type) { case testHeader: err := tw.WriteHeader(&tf.hdr) - if err != tf.wantErr { + if !equalError(err, tf.wantErr) { t.Fatalf("test %d, WriteHeader() = %v, want %v", i, err, tf.wantErr) } case testWrite: got, err := tw.Write([]byte(tf.str)) - if got != tf.wantCnt || err != tf.wantErr { + if got != tf.wantCnt || !equalError(err, tf.wantErr) { t.Fatalf("test %d, Write() = (%d, %v), want (%d, %v)", i, got, err, tf.wantCnt, tf.wantErr) } case testFill: got, err := tw.fillZeros(tf.cnt) - if got != tf.wantCnt || err != tf.wantErr { + if got != tf.wantCnt || !equalError(err, tf.wantErr) { t.Fatalf("test %d, fillZeros() = (%d, %v), want (%d, %v)", i, got, err, tf.wantCnt, tf.wantErr) } case testClose: err := tw.Close() - if err != tf.wantErr { + if !equalError(err, tf.wantErr) { t.Fatalf("test %d, Close() = %v, want %v", i, err, tf.wantErr) } default: @@ -740,8 +748,8 @@ func TestWriterErrors(t *testing.T) { t.Run("NegativeSize", func(t *testing.T) { tw := NewWriter(new(bytes.Buffer)) hdr := &Header{Name: "small.txt", Size: -1} - if err := tw.WriteHeader(hdr); err != ErrHeader { - t.Fatalf("WriteHeader() = nil, want %v", ErrHeader) + if err := tw.WriteHeader(hdr); err == nil { + t.Fatalf("WriteHeader() = nil, want non-nil error") } })