diff --git a/src/compress/flate/flate_test.go b/src/compress/flate/flate_test.go index 06d35a066a..3f67025cd7 100644 --- a/src/compress/flate/flate_test.go +++ b/src/compress/flate/flate_test.go @@ -10,23 +10,11 @@ package flate import ( "bytes" + "encoding/hex" "io/ioutil" - "strings" "testing" ) -func TestUncompressedSource(t *testing.T) { - decoder := NewReader(bytes.NewReader([]byte{0x01, 0x01, 0x00, 0xfe, 0xff, 0x11})) - output := make([]byte, 1) - n, error := decoder.Read(output) - if n != 1 || error != nil { - t.Fatalf("decoder.Read() = %d, %v, want 1, nil", n, error) - } - if output[0] != 0x11 { - t.Errorf("output[0] = %x, want 0x11", output[0]) - } -} - // The following test should not panic. func TestIssue5915(t *testing.T) { bits := []int{4, 0, 0, 6, 4, 3, 2, 3, 3, 4, 4, 5, 0, 0, 0, 0, 5, 5, 6, @@ -90,29 +78,183 @@ func TestInvalidBits(t *testing.T) { } } -func TestDegenerateHuffmanCoding(t *testing.T) { - const ( - want = "abcabc" - // This compressed form has a dynamic Huffman block, even though a - // sensible encoder would use a literal data block, as the latter is - // shorter. Still, it is a valid flate compression of "abcabc". It has - // a degenerate Huffman table with only one coded value: the one - // non-literal back-ref copy of the first "abc" to the second "abc". - // - // To verify that this is decompressible with zlib (the C library), - // it's easy to use the Python wrapper library: - // >>> import zlib - // >>> compressed = "\x0c\xc2...etc...\xff\xff" - // >>> zlib.decompress(compressed, -15) # negative means no GZIP header. - // 'abcabc' - compressed = "\x0c\xc2\x01\x0d\x00\x00\x00\x82\xb0\xac\x4a\xff\x0e\xb0\x7d\x27" + - "\x06\x00\x00\xff\xff" - ) - b, err := ioutil.ReadAll(NewReader(strings.NewReader(compressed))) - if err != nil { - t.Fatal(err) - } - if got := string(b); got != want { - t.Fatalf("got %q, want %q", got, want) +func TestStreams(t *testing.T) { + // To verify any of these hexstrings as valid or invalid flate streams + // according to the C zlib library, you can use the Python wrapper library: + // >>> hex_string = "010100feff11" + // >>> import zlib + // >>> zlib.decompress(hex_string.decode("hex"), -15) # Negative means raw DEFLATE + // '\x11' + + testCases := []struct { + desc string // Description of the stream + stream string // Hexstring of the input DEFLATE stream + want string // Expected result. Use "fail" to expect failure + }{{ + "degenerate HCLenTree", + "05e0010000000000100000000000000000000000000000000000000000000000" + + "00000000000000000004", + "fail", + }, { + "complete HCLenTree, empty HLitTree, empty HDistTree", + "05e0010400000000000000000000000000000000000000000000000000000000" + + "00000000000000000010", + "fail", + }, { + "empty HCLenTree", + "05e0010000000000000000000000000000000000000000000000000000000000" + + "00000000000000000010", + "fail", + }, { + "complete HCLenTree, complete HLitTree, empty HDistTree, use missing HDist symbol", + "000100feff000de0010400000000100000000000000000000000000000000000" + + "0000000000000000000000000000002c", + "fail", + }, { + "complete HCLenTree, complete HLitTree, degenerate HDistTree, use missing HDist symbol", + "000100feff000de0010000000000000000000000000000000000000000000000" + + "00000000000000000610000000004070", + "fail", + }, { + "complete HCLenTree, empty HLitTree, empty HDistTree", + "05e0010400000000100400000000000000000000000000000000000000000000" + + "0000000000000000000000000008", + "fail", + }, { + "complete HCLenTree, empty HLitTree, degenerate HDistTree", + "05e0010400000000100400000000000000000000000000000000000000000000" + + "0000000000000000000800000008", + "fail", + }, { + "complete HCLenTree, degenerate HLitTree, degenerate HDistTree, use missing HLit symbol", + "05e0010400000000100000000000000000000000000000000000000000000000" + + "0000000000000000001c", + "fail", + }, { + "complete HCLenTree, complete HLitTree, too large HDistTree", + "edff870500000000200400000000000000000000000000000000000000000000" + + "000000000000000000080000000000000004", + "fail", + }, { + "complete HCLenTree, complete HLitTree, empty HDistTree, excessive repeater code", + "edfd870500000000200400000000000000000000000000000000000000000000" + + "000000000000000000e8b100", + "fail", + }, { + "complete HCLenTree, complete HLitTree, empty HDistTree of normal length 30", + "05fd01240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" + + "ffffffffffffffffff07000000fe01", + "", + }, { + "complete HCLenTree, complete HLitTree, empty HDistTree of excessive length 31", + "05fe01240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" + + "ffffffffffffffffff07000000fc03", + "fail", + }, { + "complete HCLenTree, over-subscribed HLitTree, empty HDistTree", + "05e001240000000000fcffffffffffffffffffffffffffffffffffffffffffff" + + "ffffffffffffffffff07f00f", + "fail", + }, { + "complete HCLenTree, under-subscribed HLitTree, empty HDistTree", + "05e001240000000000fcffffffffffffffffffffffffffffffffffffffffffff" + + "fffffffffcffffffff07f00f", + "fail", + }, { + "complete HCLenTree, complete HLitTree with single code, empty HDistTree", + "05e001240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" + + "ffffffffffffffffff07f00f", + "01", + }, { + "complete HCLenTree, complete HLitTree with multiple codes, empty HDistTree", + "05e301240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" + + "ffffffffffffffffff07807f", + "01", + }, { + "complete HCLenTree, complete HLitTree, degenerate HDistTree, use valid HDist symbol", + "000100feff000de0010400000000100000000000000000000000000000000000" + + "0000000000000000000000000000003c", + "00000000", + }, { + "complete HCLenTree, degenerate HLitTree, degenerate HDistTree", + "05e0010400000000100000000000000000000000000000000000000000000000" + + "0000000000000000000c", + "", + }, { + "complete HCLenTree, degenerate HLitTree, empty HDistTree", + "05e0010400000000100000000000000000000000000000000000000000000000" + + "00000000000000000004", + "", + }, { + "complete HCLenTree, complete HLitTree, empty HDistTree, spanning repeater code", + "edfd870500000000200400000000000000000000000000000000000000000000" + + "000000000000000000e8b000", + "", + }, { + "complete HCLenTree with length codes, complete HLitTree, empty HDistTree", + "ede0010400000000100000000000000000000000000000000000000000000000" + + "0000000000000000000400004000", + "", + }, { + "complete HCLenTree, complete HLitTree, degenerate HDistTree, use valid HLit symbol 284 with count 31", + "000100feff00ede0010400000000100000000000000000000000000000000000" + + "000000000000000000000000000000040000407f00", + "0000000000000000000000000000000000000000000000000000000000000000" + + "0000000000000000000000000000000000000000000000000000000000000000" + + "0000000000000000000000000000000000000000000000000000000000000000" + + "0000000000000000000000000000000000000000000000000000000000000000" + + "0000000000000000000000000000000000000000000000000000000000000000" + + "0000000000000000000000000000000000000000000000000000000000000000" + + "0000000000000000000000000000000000000000000000000000000000000000" + + "0000000000000000000000000000000000000000000000000000000000000000" + + "000000", + }, { + "complete HCLenTree, complete HLitTree, degenerate HDistTree, use valid HLit and HDist symbols", + "0cc2010d00000082b0ac4aff0eb07d27060000ffff", + "616263616263", + }, { + "fixed block, use reserved symbol 287", + "33180700", + "fail", + }, { + "raw block", + "010100feff11", + "11", + }, { + "issue 10426 - over-subscribed HCLenTree causes a hang", + "344c4a4e494d4b070000ff2e2eff2e2e2e2e2eff", + "fail", + }, { + "issue 11030 - empty HDistTree unexpectedly leads to error", + "05c0070600000080400fff37a0ca", + "", + }, { + "issue 11033 - empty HDistTree unexpectedly leads to error", + "050fb109c020cca5d017dcbca044881ee1034ec149c8980bbc413c2ab35be9dc" + + "b1473449922449922411202306ee97b0383a521b4ffdcf3217f9f7d3adb701", + "3130303634342068652e706870005d05355f7ed957ff084a90925d19e3ebc6d0" + + "c6d7", + }} + + for i, tc := range testCases { + data, err := hex.DecodeString(tc.stream) + if err != nil { + t.Fatal(err) + } + data, err = ioutil.ReadAll(NewReader(bytes.NewReader(data))) + if tc.want == "fail" { + if err == nil { + t.Errorf("#%d (%s): got nil error, want non-nil", i, tc.desc) + } + } else { + if err != nil { + t.Errorf("#%d (%s): %v", i, tc.desc, err) + continue + } + if got := hex.EncodeToString(data); got != tc.want { + t.Errorf("#%d (%s):\ngot %q\nwant %q", i, tc.desc, got, tc.want) + } + + } } } diff --git a/src/compress/flate/gen.go b/src/compress/flate/gen.go index eeafa84a5d..154c89a488 100644 --- a/src/compress/flate/gen.go +++ b/src/compress/flate/gen.go @@ -45,6 +45,10 @@ type huffmanDecoder struct { } // Initialize Huffman decoding tables from array of code lengths. +// Following this function, h is guaranteed to be initialized into a complete +// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a +// degenerate case where the tree has only a single symbol with length 1. Empty +// trees are permitted. func (h *huffmanDecoder) init(bits []int) bool { // Sanity enables additional runtime tests during Huffman // table construction. It's intended to be used during @@ -71,8 +75,16 @@ func (h *huffmanDecoder) init(bits []int) bool { } count[n]++ } + + // Empty tree. The decompressor.huffSym function will fail later if the tree + // is used. Technically, an empty tree is only valid for the HDIST tree and + // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree + // is guaranteed to fail since it will attempt to use the tree to decode the + // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is + // guaranteed to fail later since the compressed data section must be + // composed of at least one symbol (the end-of-block marker). if max == 0 { - return false + return true } code := 0 diff --git a/src/compress/flate/huffman_bit_writer.go b/src/compress/flate/huffman_bit_writer.go index b182a710b9..616440412e 100644 --- a/src/compress/flate/huffman_bit_writer.go +++ b/src/compress/flate/huffman_bit_writer.go @@ -87,11 +87,11 @@ type huffmanBitWriter struct { func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter { return &huffmanBitWriter{ w: w, - literalFreq: make([]int32, maxLit), + literalFreq: make([]int32, maxNumLit), offsetFreq: make([]int32, offsetCodeCount), - codegen: make([]uint8, maxLit+offsetCodeCount+1), + codegen: make([]uint8, maxNumLit+offsetCodeCount+1), codegenFreq: make([]int32, codegenCodeCount), - literalEncoding: newHuffmanEncoder(maxLit), + literalEncoding: newHuffmanEncoder(maxNumLit), offsetEncoding: newHuffmanEncoder(offsetCodeCount), codegenEncoding: newHuffmanEncoder(codegenCodeCount), } diff --git a/src/compress/flate/huffman_code.go b/src/compress/flate/huffman_code.go index 3b9fce466e..50ec79c940 100644 --- a/src/compress/flate/huffman_code.go +++ b/src/compress/flate/huffman_code.go @@ -47,11 +47,11 @@ func newHuffmanEncoder(size int) *huffmanEncoder { // Generates a HuffmanCode corresponding to the fixed literal table func generateFixedLiteralEncoding() *huffmanEncoder { - h := newHuffmanEncoder(maxLit) + h := newHuffmanEncoder(maxNumLit) codeBits := h.codeBits code := h.code var ch uint16 - for ch = 0; ch < maxLit; ch++ { + for ch = 0; ch < maxNumLit; ch++ { var bits uint16 var size uint8 switch { diff --git a/src/compress/flate/inflate.go b/src/compress/flate/inflate.go index 6f88159dfa..04372dec24 100644 --- a/src/compress/flate/inflate.go +++ b/src/compress/flate/inflate.go @@ -18,10 +18,12 @@ import ( const ( maxCodeLen = 16 // max length of Huffman code maxHist = 32768 // max history required - // The next three numbers come from the RFC, section 3.2.7. - maxLit = 286 - maxDist = 32 - numCodes = 19 // number of codes in Huffman meta-code + // The next three numbers come from the RFC section 3.2.7, with the + // additional proviso in section 3.2.5 which implies that distance codes + // 30 and 31 should never occur in compressed data. + maxNumLit = 286 + maxNumDist = 30 + numCodes = 19 // number of codes in Huffman meta-code ) // A CorruptInputError reports the presence of corrupt input at a given offset. @@ -101,6 +103,10 @@ type huffmanDecoder struct { } // Initialize Huffman decoding tables from array of code lengths. +// Following this function, h is guaranteed to be initialized into a complete +// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a +// degenerate case where the tree has only a single symbol with length 1. Empty +// trees are permitted. func (h *huffmanDecoder) init(bits []int) bool { // Sanity enables additional runtime tests during Huffman // table construction. It's intended to be used during @@ -127,8 +133,16 @@ func (h *huffmanDecoder) init(bits []int) bool { } count[n]++ } + + // Empty tree. The decompressor.huffSym function will fail later if the tree + // is used. Technically, an empty tree is only valid for the HDIST tree and + // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree + // is guaranteed to fail since it will attempt to use the tree to decode the + // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is + // guaranteed to fail later since the compressed data section must be + // composed of at least one symbol (the end-of-block marker). if max == 0 { - return false + return true } code := 0 @@ -258,7 +272,7 @@ type decompressor struct { h1, h2 huffmanDecoder // Length arrays used to define Huffman codes. - bits *[maxLit + maxDist]int + bits *[maxNumLit + maxNumDist]int codebits *[numCodes]int // Output history, buffer. @@ -356,12 +370,14 @@ func (f *decompressor) readHuffman() error { } } nlit := int(f.b&0x1F) + 257 - if nlit > maxLit { + if nlit > maxNumLit { return CorruptInputError(f.roffset) } f.b >>= 5 ndist := int(f.b&0x1F) + 1 - // maxDist is 32, so ndist is always valid. + if ndist > maxNumDist { + return CorruptInputError(f.roffset) + } f.b >>= 5 nclen := int(f.b&0xF) + 4 // numCodes is 19, so nclen is always valid. @@ -492,9 +508,12 @@ func (f *decompressor) huffmanBlock() { case v < 285: length = v*32 - (281*32 - 131) n = 5 - default: + case v < maxNumLit: length = 258 n = 0 + default: + f.err = CorruptInputError(f.roffset) + return } if n > 0 { for f.nb < n { @@ -529,10 +548,7 @@ func (f *decompressor) huffmanBlock() { switch { case dist < 4: dist++ - case dist >= 30: - f.err = CorruptInputError(f.roffset) - return - default: + case dist < maxNumDist: nb := uint(dist-2) >> 1 // have 1 bit in bottom of dist, need nb more. extra := (dist & 1) << nb @@ -546,6 +562,9 @@ func (f *decompressor) huffmanBlock() { f.b >>= nb f.nb -= nb dist = 1<<(nb+1) + 1 + extra + default: + f.err = CorruptInputError(f.roffset) + return } // Copy history[-dist:-dist+length] into output. @@ -692,6 +711,10 @@ func (f *decompressor) moreBits() error { // Read the next Huffman-encoded symbol from f according to h. func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. n := uint(h.min) for { for f.nb < n { @@ -761,7 +784,7 @@ func (f *decompressor) Reset(r io.Reader, dict []byte) error { // The ReadCloser returned by NewReader also implements Resetter. func NewReader(r io.Reader) io.ReadCloser { var f decompressor - f.bits = new([maxLit + maxDist]int) + f.bits = new([maxNumLit + maxNumDist]int) f.codebits = new([numCodes]int) f.r = makeReader(r) f.hist = new([maxHist]byte) @@ -780,7 +803,7 @@ func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { var f decompressor f.r = makeReader(r) f.hist = new([maxHist]byte) - f.bits = new([maxLit + maxDist]int) + f.bits = new([maxNumLit + maxNumDist]int) f.codebits = new([numCodes]int) f.step = (*decompressor).nextBlock f.setDict(dict)