mirror of
https://github.com/golang/go
synced 2024-11-23 15:00:03 -07:00
compress/flate: make library RFC1951 compliant
Corrected several issues:
* RFC1951 section 3.2.7 dictates that it is okay for the HDist tree to have a
single code of zero bits. Furthermore, the behavior of the C zlib library
permits empty trees even when there are more than one codes.
* RFC1951 section 3.2.5 shows that HLit codes 286 and 287 are invalid. Thus,
Go's implementation should choke on inputs using these codes.
* RFC1951 section 3.2.5 and 3.2.7 are ambiguous about whether the number of
HDist codes can be greater than 30. The C zlib library (which is the canonical
reference implementation) performs this check here:
62d6112a79/inflate.c (L906)
In addition, a number of test cases were added to the unit tests that exercises
these edge cases. The test cases listed in TestStreams will either fail or
succeed in a manner matching the behaviour of the C zlib version. Given that the
C zlib implementation is the reference for the world, Go's implementation should
match C zlib behaviour.
Fixes #11030
Change-Id: Ic24e4e40ce5832c7e1930249246e86d34bfedaa6
Reviewed-on: https://go-review.googlesource.com/11000
Reviewed-by: Nigel Tao <nigeltao@golang.org>
This commit is contained in:
parent
10076b55ec
commit
b03129aa27
@ -10,23 +10,11 @@ package flate
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/hex"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestUncompressedSource(t *testing.T) {
|
||||
decoder := NewReader(bytes.NewReader([]byte{0x01, 0x01, 0x00, 0xfe, 0xff, 0x11}))
|
||||
output := make([]byte, 1)
|
||||
n, error := decoder.Read(output)
|
||||
if n != 1 || error != nil {
|
||||
t.Fatalf("decoder.Read() = %d, %v, want 1, nil", n, error)
|
||||
}
|
||||
if output[0] != 0x11 {
|
||||
t.Errorf("output[0] = %x, want 0x11", output[0])
|
||||
}
|
||||
}
|
||||
|
||||
// The following test should not panic.
|
||||
func TestIssue5915(t *testing.T) {
|
||||
bits := []int{4, 0, 0, 6, 4, 3, 2, 3, 3, 4, 4, 5, 0, 0, 0, 0, 5, 5, 6,
|
||||
@ -90,29 +78,183 @@ func TestInvalidBits(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDegenerateHuffmanCoding(t *testing.T) {
|
||||
const (
|
||||
want = "abcabc"
|
||||
// This compressed form has a dynamic Huffman block, even though a
|
||||
// sensible encoder would use a literal data block, as the latter is
|
||||
// shorter. Still, it is a valid flate compression of "abcabc". It has
|
||||
// a degenerate Huffman table with only one coded value: the one
|
||||
// non-literal back-ref copy of the first "abc" to the second "abc".
|
||||
//
|
||||
// To verify that this is decompressible with zlib (the C library),
|
||||
// it's easy to use the Python wrapper library:
|
||||
// >>> import zlib
|
||||
// >>> compressed = "\x0c\xc2...etc...\xff\xff"
|
||||
// >>> zlib.decompress(compressed, -15) # negative means no GZIP header.
|
||||
// 'abcabc'
|
||||
compressed = "\x0c\xc2\x01\x0d\x00\x00\x00\x82\xb0\xac\x4a\xff\x0e\xb0\x7d\x27" +
|
||||
"\x06\x00\x00\xff\xff"
|
||||
)
|
||||
b, err := ioutil.ReadAll(NewReader(strings.NewReader(compressed)))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if got := string(b); got != want {
|
||||
t.Fatalf("got %q, want %q", got, want)
|
||||
func TestStreams(t *testing.T) {
|
||||
// To verify any of these hexstrings as valid or invalid flate streams
|
||||
// according to the C zlib library, you can use the Python wrapper library:
|
||||
// >>> hex_string = "010100feff11"
|
||||
// >>> import zlib
|
||||
// >>> zlib.decompress(hex_string.decode("hex"), -15) # Negative means raw DEFLATE
|
||||
// '\x11'
|
||||
|
||||
testCases := []struct {
|
||||
desc string // Description of the stream
|
||||
stream string // Hexstring of the input DEFLATE stream
|
||||
want string // Expected result. Use "fail" to expect failure
|
||||
}{{
|
||||
"degenerate HCLenTree",
|
||||
"05e0010000000000100000000000000000000000000000000000000000000000" +
|
||||
"00000000000000000004",
|
||||
"fail",
|
||||
}, {
|
||||
"complete HCLenTree, empty HLitTree, empty HDistTree",
|
||||
"05e0010400000000000000000000000000000000000000000000000000000000" +
|
||||
"00000000000000000010",
|
||||
"fail",
|
||||
}, {
|
||||
"empty HCLenTree",
|
||||
"05e0010000000000000000000000000000000000000000000000000000000000" +
|
||||
"00000000000000000010",
|
||||
"fail",
|
||||
}, {
|
||||
"complete HCLenTree, complete HLitTree, empty HDistTree, use missing HDist symbol",
|
||||
"000100feff000de0010400000000100000000000000000000000000000000000" +
|
||||
"0000000000000000000000000000002c",
|
||||
"fail",
|
||||
}, {
|
||||
"complete HCLenTree, complete HLitTree, degenerate HDistTree, use missing HDist symbol",
|
||||
"000100feff000de0010000000000000000000000000000000000000000000000" +
|
||||
"00000000000000000610000000004070",
|
||||
"fail",
|
||||
}, {
|
||||
"complete HCLenTree, empty HLitTree, empty HDistTree",
|
||||
"05e0010400000000100400000000000000000000000000000000000000000000" +
|
||||
"0000000000000000000000000008",
|
||||
"fail",
|
||||
}, {
|
||||
"complete HCLenTree, empty HLitTree, degenerate HDistTree",
|
||||
"05e0010400000000100400000000000000000000000000000000000000000000" +
|
||||
"0000000000000000000800000008",
|
||||
"fail",
|
||||
}, {
|
||||
"complete HCLenTree, degenerate HLitTree, degenerate HDistTree, use missing HLit symbol",
|
||||
"05e0010400000000100000000000000000000000000000000000000000000000" +
|
||||
"0000000000000000001c",
|
||||
"fail",
|
||||
}, {
|
||||
"complete HCLenTree, complete HLitTree, too large HDistTree",
|
||||
"edff870500000000200400000000000000000000000000000000000000000000" +
|
||||
"000000000000000000080000000000000004",
|
||||
"fail",
|
||||
}, {
|
||||
"complete HCLenTree, complete HLitTree, empty HDistTree, excessive repeater code",
|
||||
"edfd870500000000200400000000000000000000000000000000000000000000" +
|
||||
"000000000000000000e8b100",
|
||||
"fail",
|
||||
}, {
|
||||
"complete HCLenTree, complete HLitTree, empty HDistTree of normal length 30",
|
||||
"05fd01240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" +
|
||||
"ffffffffffffffffff07000000fe01",
|
||||
"",
|
||||
}, {
|
||||
"complete HCLenTree, complete HLitTree, empty HDistTree of excessive length 31",
|
||||
"05fe01240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" +
|
||||
"ffffffffffffffffff07000000fc03",
|
||||
"fail",
|
||||
}, {
|
||||
"complete HCLenTree, over-subscribed HLitTree, empty HDistTree",
|
||||
"05e001240000000000fcffffffffffffffffffffffffffffffffffffffffffff" +
|
||||
"ffffffffffffffffff07f00f",
|
||||
"fail",
|
||||
}, {
|
||||
"complete HCLenTree, under-subscribed HLitTree, empty HDistTree",
|
||||
"05e001240000000000fcffffffffffffffffffffffffffffffffffffffffffff" +
|
||||
"fffffffffcffffffff07f00f",
|
||||
"fail",
|
||||
}, {
|
||||
"complete HCLenTree, complete HLitTree with single code, empty HDistTree",
|
||||
"05e001240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" +
|
||||
"ffffffffffffffffff07f00f",
|
||||
"01",
|
||||
}, {
|
||||
"complete HCLenTree, complete HLitTree with multiple codes, empty HDistTree",
|
||||
"05e301240000000000f8ffffffffffffffffffffffffffffffffffffffffffff" +
|
||||
"ffffffffffffffffff07807f",
|
||||
"01",
|
||||
}, {
|
||||
"complete HCLenTree, complete HLitTree, degenerate HDistTree, use valid HDist symbol",
|
||||
"000100feff000de0010400000000100000000000000000000000000000000000" +
|
||||
"0000000000000000000000000000003c",
|
||||
"00000000",
|
||||
}, {
|
||||
"complete HCLenTree, degenerate HLitTree, degenerate HDistTree",
|
||||
"05e0010400000000100000000000000000000000000000000000000000000000" +
|
||||
"0000000000000000000c",
|
||||
"",
|
||||
}, {
|
||||
"complete HCLenTree, degenerate HLitTree, empty HDistTree",
|
||||
"05e0010400000000100000000000000000000000000000000000000000000000" +
|
||||
"00000000000000000004",
|
||||
"",
|
||||
}, {
|
||||
"complete HCLenTree, complete HLitTree, empty HDistTree, spanning repeater code",
|
||||
"edfd870500000000200400000000000000000000000000000000000000000000" +
|
||||
"000000000000000000e8b000",
|
||||
"",
|
||||
}, {
|
||||
"complete HCLenTree with length codes, complete HLitTree, empty HDistTree",
|
||||
"ede0010400000000100000000000000000000000000000000000000000000000" +
|
||||
"0000000000000000000400004000",
|
||||
"",
|
||||
}, {
|
||||
"complete HCLenTree, complete HLitTree, degenerate HDistTree, use valid HLit symbol 284 with count 31",
|
||||
"000100feff00ede0010400000000100000000000000000000000000000000000" +
|
||||
"000000000000000000000000000000040000407f00",
|
||||
"0000000000000000000000000000000000000000000000000000000000000000" +
|
||||
"0000000000000000000000000000000000000000000000000000000000000000" +
|
||||
"0000000000000000000000000000000000000000000000000000000000000000" +
|
||||
"0000000000000000000000000000000000000000000000000000000000000000" +
|
||||
"0000000000000000000000000000000000000000000000000000000000000000" +
|
||||
"0000000000000000000000000000000000000000000000000000000000000000" +
|
||||
"0000000000000000000000000000000000000000000000000000000000000000" +
|
||||
"0000000000000000000000000000000000000000000000000000000000000000" +
|
||||
"000000",
|
||||
}, {
|
||||
"complete HCLenTree, complete HLitTree, degenerate HDistTree, use valid HLit and HDist symbols",
|
||||
"0cc2010d00000082b0ac4aff0eb07d27060000ffff",
|
||||
"616263616263",
|
||||
}, {
|
||||
"fixed block, use reserved symbol 287",
|
||||
"33180700",
|
||||
"fail",
|
||||
}, {
|
||||
"raw block",
|
||||
"010100feff11",
|
||||
"11",
|
||||
}, {
|
||||
"issue 10426 - over-subscribed HCLenTree causes a hang",
|
||||
"344c4a4e494d4b070000ff2e2eff2e2e2e2e2eff",
|
||||
"fail",
|
||||
}, {
|
||||
"issue 11030 - empty HDistTree unexpectedly leads to error",
|
||||
"05c0070600000080400fff37a0ca",
|
||||
"",
|
||||
}, {
|
||||
"issue 11033 - empty HDistTree unexpectedly leads to error",
|
||||
"050fb109c020cca5d017dcbca044881ee1034ec149c8980bbc413c2ab35be9dc" +
|
||||
"b1473449922449922411202306ee97b0383a521b4ffdcf3217f9f7d3adb701",
|
||||
"3130303634342068652e706870005d05355f7ed957ff084a90925d19e3ebc6d0" +
|
||||
"c6d7",
|
||||
}}
|
||||
|
||||
for i, tc := range testCases {
|
||||
data, err := hex.DecodeString(tc.stream)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
data, err = ioutil.ReadAll(NewReader(bytes.NewReader(data)))
|
||||
if tc.want == "fail" {
|
||||
if err == nil {
|
||||
t.Errorf("#%d (%s): got nil error, want non-nil", i, tc.desc)
|
||||
}
|
||||
} else {
|
||||
if err != nil {
|
||||
t.Errorf("#%d (%s): %v", i, tc.desc, err)
|
||||
continue
|
||||
}
|
||||
if got := hex.EncodeToString(data); got != tc.want {
|
||||
t.Errorf("#%d (%s):\ngot %q\nwant %q", i, tc.desc, got, tc.want)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -45,6 +45,10 @@ type huffmanDecoder struct {
|
||||
}
|
||||
|
||||
// Initialize Huffman decoding tables from array of code lengths.
|
||||
// Following this function, h is guaranteed to be initialized into a complete
|
||||
// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
|
||||
// degenerate case where the tree has only a single symbol with length 1. Empty
|
||||
// trees are permitted.
|
||||
func (h *huffmanDecoder) init(bits []int) bool {
|
||||
// Sanity enables additional runtime tests during Huffman
|
||||
// table construction. It's intended to be used during
|
||||
@ -71,8 +75,16 @@ func (h *huffmanDecoder) init(bits []int) bool {
|
||||
}
|
||||
count[n]++
|
||||
}
|
||||
|
||||
// Empty tree. The decompressor.huffSym function will fail later if the tree
|
||||
// is used. Technically, an empty tree is only valid for the HDIST tree and
|
||||
// not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
|
||||
// is guaranteed to fail since it will attempt to use the tree to decode the
|
||||
// codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
|
||||
// guaranteed to fail later since the compressed data section must be
|
||||
// composed of at least one symbol (the end-of-block marker).
|
||||
if max == 0 {
|
||||
return false
|
||||
return true
|
||||
}
|
||||
|
||||
code := 0
|
||||
|
@ -87,11 +87,11 @@ type huffmanBitWriter struct {
|
||||
func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
|
||||
return &huffmanBitWriter{
|
||||
w: w,
|
||||
literalFreq: make([]int32, maxLit),
|
||||
literalFreq: make([]int32, maxNumLit),
|
||||
offsetFreq: make([]int32, offsetCodeCount),
|
||||
codegen: make([]uint8, maxLit+offsetCodeCount+1),
|
||||
codegen: make([]uint8, maxNumLit+offsetCodeCount+1),
|
||||
codegenFreq: make([]int32, codegenCodeCount),
|
||||
literalEncoding: newHuffmanEncoder(maxLit),
|
||||
literalEncoding: newHuffmanEncoder(maxNumLit),
|
||||
offsetEncoding: newHuffmanEncoder(offsetCodeCount),
|
||||
codegenEncoding: newHuffmanEncoder(codegenCodeCount),
|
||||
}
|
||||
|
@ -47,11 +47,11 @@ func newHuffmanEncoder(size int) *huffmanEncoder {
|
||||
|
||||
// Generates a HuffmanCode corresponding to the fixed literal table
|
||||
func generateFixedLiteralEncoding() *huffmanEncoder {
|
||||
h := newHuffmanEncoder(maxLit)
|
||||
h := newHuffmanEncoder(maxNumLit)
|
||||
codeBits := h.codeBits
|
||||
code := h.code
|
||||
var ch uint16
|
||||
for ch = 0; ch < maxLit; ch++ {
|
||||
for ch = 0; ch < maxNumLit; ch++ {
|
||||
var bits uint16
|
||||
var size uint8
|
||||
switch {
|
||||
|
@ -18,10 +18,12 @@ import (
|
||||
const (
|
||||
maxCodeLen = 16 // max length of Huffman code
|
||||
maxHist = 32768 // max history required
|
||||
// The next three numbers come from the RFC, section 3.2.7.
|
||||
maxLit = 286
|
||||
maxDist = 32
|
||||
numCodes = 19 // number of codes in Huffman meta-code
|
||||
// The next three numbers come from the RFC section 3.2.7, with the
|
||||
// additional proviso in section 3.2.5 which implies that distance codes
|
||||
// 30 and 31 should never occur in compressed data.
|
||||
maxNumLit = 286
|
||||
maxNumDist = 30
|
||||
numCodes = 19 // number of codes in Huffman meta-code
|
||||
)
|
||||
|
||||
// A CorruptInputError reports the presence of corrupt input at a given offset.
|
||||
@ -101,6 +103,10 @@ type huffmanDecoder struct {
|
||||
}
|
||||
|
||||
// Initialize Huffman decoding tables from array of code lengths.
|
||||
// Following this function, h is guaranteed to be initialized into a complete
|
||||
// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
|
||||
// degenerate case where the tree has only a single symbol with length 1. Empty
|
||||
// trees are permitted.
|
||||
func (h *huffmanDecoder) init(bits []int) bool {
|
||||
// Sanity enables additional runtime tests during Huffman
|
||||
// table construction. It's intended to be used during
|
||||
@ -127,8 +133,16 @@ func (h *huffmanDecoder) init(bits []int) bool {
|
||||
}
|
||||
count[n]++
|
||||
}
|
||||
|
||||
// Empty tree. The decompressor.huffSym function will fail later if the tree
|
||||
// is used. Technically, an empty tree is only valid for the HDIST tree and
|
||||
// not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
|
||||
// is guaranteed to fail since it will attempt to use the tree to decode the
|
||||
// codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
|
||||
// guaranteed to fail later since the compressed data section must be
|
||||
// composed of at least one symbol (the end-of-block marker).
|
||||
if max == 0 {
|
||||
return false
|
||||
return true
|
||||
}
|
||||
|
||||
code := 0
|
||||
@ -258,7 +272,7 @@ type decompressor struct {
|
||||
h1, h2 huffmanDecoder
|
||||
|
||||
// Length arrays used to define Huffman codes.
|
||||
bits *[maxLit + maxDist]int
|
||||
bits *[maxNumLit + maxNumDist]int
|
||||
codebits *[numCodes]int
|
||||
|
||||
// Output history, buffer.
|
||||
@ -356,12 +370,14 @@ func (f *decompressor) readHuffman() error {
|
||||
}
|
||||
}
|
||||
nlit := int(f.b&0x1F) + 257
|
||||
if nlit > maxLit {
|
||||
if nlit > maxNumLit {
|
||||
return CorruptInputError(f.roffset)
|
||||
}
|
||||
f.b >>= 5
|
||||
ndist := int(f.b&0x1F) + 1
|
||||
// maxDist is 32, so ndist is always valid.
|
||||
if ndist > maxNumDist {
|
||||
return CorruptInputError(f.roffset)
|
||||
}
|
||||
f.b >>= 5
|
||||
nclen := int(f.b&0xF) + 4
|
||||
// numCodes is 19, so nclen is always valid.
|
||||
@ -492,9 +508,12 @@ func (f *decompressor) huffmanBlock() {
|
||||
case v < 285:
|
||||
length = v*32 - (281*32 - 131)
|
||||
n = 5
|
||||
default:
|
||||
case v < maxNumLit:
|
||||
length = 258
|
||||
n = 0
|
||||
default:
|
||||
f.err = CorruptInputError(f.roffset)
|
||||
return
|
||||
}
|
||||
if n > 0 {
|
||||
for f.nb < n {
|
||||
@ -529,10 +548,7 @@ func (f *decompressor) huffmanBlock() {
|
||||
switch {
|
||||
case dist < 4:
|
||||
dist++
|
||||
case dist >= 30:
|
||||
f.err = CorruptInputError(f.roffset)
|
||||
return
|
||||
default:
|
||||
case dist < maxNumDist:
|
||||
nb := uint(dist-2) >> 1
|
||||
// have 1 bit in bottom of dist, need nb more.
|
||||
extra := (dist & 1) << nb
|
||||
@ -546,6 +562,9 @@ func (f *decompressor) huffmanBlock() {
|
||||
f.b >>= nb
|
||||
f.nb -= nb
|
||||
dist = 1<<(nb+1) + 1 + extra
|
||||
default:
|
||||
f.err = CorruptInputError(f.roffset)
|
||||
return
|
||||
}
|
||||
|
||||
// Copy history[-dist:-dist+length] into output.
|
||||
@ -692,6 +711,10 @@ func (f *decompressor) moreBits() error {
|
||||
|
||||
// Read the next Huffman-encoded symbol from f according to h.
|
||||
func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
|
||||
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
|
||||
// with single element, huffSym must error on these two edge cases. In both
|
||||
// cases, the chunks slice will be 0 for the invalid sequence, leading it
|
||||
// satisfy the n == 0 check below.
|
||||
n := uint(h.min)
|
||||
for {
|
||||
for f.nb < n {
|
||||
@ -761,7 +784,7 @@ func (f *decompressor) Reset(r io.Reader, dict []byte) error {
|
||||
// The ReadCloser returned by NewReader also implements Resetter.
|
||||
func NewReader(r io.Reader) io.ReadCloser {
|
||||
var f decompressor
|
||||
f.bits = new([maxLit + maxDist]int)
|
||||
f.bits = new([maxNumLit + maxNumDist]int)
|
||||
f.codebits = new([numCodes]int)
|
||||
f.r = makeReader(r)
|
||||
f.hist = new([maxHist]byte)
|
||||
@ -780,7 +803,7 @@ func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
|
||||
var f decompressor
|
||||
f.r = makeReader(r)
|
||||
f.hist = new([maxHist]byte)
|
||||
f.bits = new([maxLit + maxDist]int)
|
||||
f.bits = new([maxNumLit + maxNumDist]int)
|
||||
f.codebits = new([numCodes]int)
|
||||
f.step = (*decompressor).nextBlock
|
||||
f.setDict(dict)
|
||||
|
Loading…
Reference in New Issue
Block a user