mirror of
https://github.com/golang/go
synced 2024-11-24 08:50:14 -07:00
compress/flate: add pure huffman deflater
Add a "HuffmanOnly" compression level, where the input is only entropy encoded. The output is fully inflate compatible. Typical compression is reduction is about 50% of typical level 1 compression, however the compression time is very stable, and does not vary as much as nearly as much level 1 compression (or Snappy). This mode is useful for: * HTTP compression in a CPU limited environment. * Entropy encoding Snappy compressed data, for archiving, etc. * Compression where compression time needs to be predictable. * Fast network transfer. Snappy "usually" performs inbetween this and level 1 compression-wise, but at the same speed as "Huffman", so this is not a replacement, but a good supplement for Snappy, since it usually can compress Snappy output further. This is implemented as level -2, since this would be too much of a compression reduction to replace level 1. >go test -bench=Encode -cpu=1 BenchmarkEncodeDigitsHuffman1e4 30000 52334 ns/op 191.08 MB/s BenchmarkEncodeDigitsHuffman1e5 3000 518343 ns/op 192.92 MB/s BenchmarkEncodeDigitsHuffman1e6 300 5356884 ns/op 186.68 MB/s BenchmarkEncodeDigitsSpeed1e4 5000 324214 ns/op 30.84 MB/s BenchmarkEncodeDigitsSpeed1e5 500 3952614 ns/op 25.30 MB/s BenchmarkEncodeDigitsSpeed1e6 30 40760350 ns/op 24.53 MB/s BenchmarkEncodeDigitsDefault1e4 5000 387056 ns/op 25.84 MB/s BenchmarkEncodeDigitsDefault1e5 300 5950614 ns/op 16.80 MB/s BenchmarkEncodeDigitsDefault1e6 20 63842195 ns/op 15.66 MB/s BenchmarkEncodeDigitsCompress1e4 5000 391859 ns/op 25.52 MB/s BenchmarkEncodeDigitsCompress1e5 300 5707112 ns/op 17.52 MB/s BenchmarkEncodeDigitsCompress1e6 20 59839465 ns/op 16.71 MB/s BenchmarkEncodeTwainHuffman1e4 20000 73498 ns/op 136.06 MB/s BenchmarkEncodeTwainHuffman1e5 2000 595892 ns/op 167.82 MB/s BenchmarkEncodeTwainHuffman1e6 200 6059016 ns/op 165.04 MB/s BenchmarkEncodeTwainSpeed1e4 5000 321212 ns/op 31.13 MB/s BenchmarkEncodeTwainSpeed1e5 500 2823873 ns/op 35.41 MB/s BenchmarkEncodeTwainSpeed1e6 50 27237864 ns/op 36.71 MB/s BenchmarkEncodeTwainDefault1e4 3000 454634 ns/op 22.00 MB/s BenchmarkEncodeTwainDefault1e5 200 6859537 ns/op 14.58 MB/s BenchmarkEncodeTwainDefault1e6 20 71547405 ns/op 13.98 MB/s BenchmarkEncodeTwainCompress1e4 3000 462307 ns/op 21.63 MB/s BenchmarkEncodeTwainCompress1e5 200 7534992 ns/op 13.27 MB/s BenchmarkEncodeTwainCompress1e6 20 80353365 ns/op 12.45 MB/s PASS ok compress/flate 55.333s Change-Id: I8e12ad13220e50d4cf7ddba6f292333efad61b0c Reviewed-on: https://go-review.googlesource.com/20982 Reviewed-by: Joe Tsai <joetsai@digital-static.net> Reviewed-by: Nigel Tao <nigeltao@golang.org>
This commit is contained in:
parent
45d334ecf1
commit
42ad1dc01e
@ -15,6 +15,7 @@ const (
|
||||
BestSpeed = 1
|
||||
BestCompression = 9
|
||||
DefaultCompression = -1
|
||||
HuffmanOnly = -2 // Disables match search and only does Huffman entropy reduction.
|
||||
logWindowSize = 15
|
||||
windowSize = 1 << logWindowSize
|
||||
windowMask = windowSize - 1
|
||||
@ -462,6 +463,18 @@ func (d *compressor) store() {
|
||||
d.windowEnd = 0
|
||||
}
|
||||
|
||||
// storeHuff compresses and stores the currently added data
|
||||
// when the d.window is full or we are at the end of the stream.
|
||||
// Any error that occurred will be in d.err
|
||||
func (d *compressor) storeHuff() {
|
||||
if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 {
|
||||
return
|
||||
}
|
||||
d.w.writeBlockHuff(false, d.window[:d.windowEnd])
|
||||
d.err = d.w.err
|
||||
d.windowEnd = 0
|
||||
}
|
||||
|
||||
func (d *compressor) write(b []byte) (n int, err error) {
|
||||
if d.err != nil {
|
||||
return 0, d.err
|
||||
@ -500,6 +513,10 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
|
||||
d.window = make([]byte, maxStoreBlockSize)
|
||||
d.fill = (*compressor).fillStore
|
||||
d.step = (*compressor).store
|
||||
case level == HuffmanOnly:
|
||||
d.window = make([]byte, maxStoreBlockSize)
|
||||
d.fill = (*compressor).fillStore
|
||||
d.step = (*compressor).storeHuff
|
||||
case level == DefaultCompression:
|
||||
level = 6
|
||||
fallthrough
|
||||
@ -509,7 +526,7 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
|
||||
d.fill = (*compressor).fillDeflate
|
||||
d.step = (*compressor).deflate
|
||||
default:
|
||||
return fmt.Errorf("flate: invalid compression level %d: want value in range [-1, 9]", level)
|
||||
return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -565,10 +582,14 @@ func (d *compressor) close() error {
|
||||
// Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression);
|
||||
// higher levels typically run slower but compress more. Level 0
|
||||
// (NoCompression) does not attempt any compression; it only adds the
|
||||
// necessary DEFLATE framing. Level -1 (DefaultCompression) uses the default
|
||||
// compression level.
|
||||
// necessary DEFLATE framing.
|
||||
// Level -1 (DefaultCompression) uses the default compression level.
|
||||
// Level -2 (HuffmanOnly) will use Huffman compression only, giving
|
||||
// a very fast compression for all types of input, but sacrificing considerable
|
||||
// compression efficiency.
|
||||
//
|
||||
// If level is in the range [-1, 9] then the error returned will be nil.
|
||||
//
|
||||
// If level is in the range [-2, 9] then the error returned will be nil.
|
||||
// Otherwise the error returned will be non-nil.
|
||||
func NewWriter(w io.Writer, level int) (*Writer, error) {
|
||||
var dw Writer
|
||||
|
@ -332,15 +332,17 @@ func testToFromWithLevelAndLimit(t *testing.T, level int, input []byte, name str
|
||||
testSync(t, level, input, name)
|
||||
}
|
||||
|
||||
func testToFromWithLimit(t *testing.T, input []byte, name string, limit [10]int) {
|
||||
func testToFromWithLimit(t *testing.T, input []byte, name string, limit [11]int) {
|
||||
for i := 0; i < 10; i++ {
|
||||
testToFromWithLevelAndLimit(t, i, input, name, limit[i])
|
||||
}
|
||||
// Test HuffmanCompression
|
||||
testToFromWithLevelAndLimit(t, -2, input, name, limit[10])
|
||||
}
|
||||
|
||||
func TestDeflateInflate(t *testing.T) {
|
||||
for i, h := range deflateInflateTests {
|
||||
testToFromWithLimit(t, h.in, fmt.Sprintf("#%d", i), [10]int{})
|
||||
testToFromWithLimit(t, h.in, fmt.Sprintf("#%d", i), [11]int{})
|
||||
}
|
||||
}
|
||||
|
||||
@ -356,19 +358,19 @@ func TestReverseBits(t *testing.T) {
|
||||
type deflateInflateStringTest struct {
|
||||
filename string
|
||||
label string
|
||||
limit [10]int
|
||||
limit [11]int
|
||||
}
|
||||
|
||||
var deflateInflateStringTests = []deflateInflateStringTest{
|
||||
{
|
||||
"../testdata/e.txt",
|
||||
"2.718281828...",
|
||||
[...]int{100018, 50650, 50960, 51150, 50930, 50790, 50790, 50790, 50790, 50790},
|
||||
[...]int{100018, 50650, 50960, 51150, 50930, 50790, 50790, 50790, 50790, 50790, 43683},
|
||||
},
|
||||
{
|
||||
"../testdata/Mark.Twain-Tom.Sawyer.txt",
|
||||
"Mark.Twain-Tom.Sawyer",
|
||||
[...]int{407330, 187598, 180361, 172974, 169160, 163476, 160936, 160506, 160295, 160295},
|
||||
[...]int{407330, 187598, 180361, 172974, 169160, 163476, 160936, 160506, 160295, 160295, 233460},
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -74,8 +74,12 @@ const (
|
||||
speed = BestSpeed
|
||||
default_ = DefaultCompression
|
||||
compress = BestCompression
|
||||
huffman = HuffmanOnly
|
||||
)
|
||||
|
||||
func BenchmarkDecodeDigitsHuffman1e4(b *testing.B) { benchmarkDecode(b, digits, huffman, 1e4) }
|
||||
func BenchmarkDecodeDigitsHuffman1e5(b *testing.B) { benchmarkDecode(b, digits, huffman, 1e5) }
|
||||
func BenchmarkDecodeDigitsHuffman1e6(b *testing.B) { benchmarkDecode(b, digits, huffman, 1e6) }
|
||||
func BenchmarkDecodeDigitsSpeed1e4(b *testing.B) { benchmarkDecode(b, digits, speed, 1e4) }
|
||||
func BenchmarkDecodeDigitsSpeed1e5(b *testing.B) { benchmarkDecode(b, digits, speed, 1e5) }
|
||||
func BenchmarkDecodeDigitsSpeed1e6(b *testing.B) { benchmarkDecode(b, digits, speed, 1e6) }
|
||||
@ -85,6 +89,9 @@ func BenchmarkDecodeDigitsDefault1e6(b *testing.B) { benchmarkDecode(b, digits,
|
||||
func BenchmarkDecodeDigitsCompress1e4(b *testing.B) { benchmarkDecode(b, digits, compress, 1e4) }
|
||||
func BenchmarkDecodeDigitsCompress1e5(b *testing.B) { benchmarkDecode(b, digits, compress, 1e5) }
|
||||
func BenchmarkDecodeDigitsCompress1e6(b *testing.B) { benchmarkDecode(b, digits, compress, 1e6) }
|
||||
func BenchmarkDecodeTwainHuffman1e4(b *testing.B) { benchmarkDecode(b, twain, huffman, 1e4) }
|
||||
func BenchmarkDecodeTwainHuffman1e5(b *testing.B) { benchmarkDecode(b, twain, huffman, 1e5) }
|
||||
func BenchmarkDecodeTwainHuffman1e6(b *testing.B) { benchmarkDecode(b, twain, huffman, 1e6) }
|
||||
func BenchmarkDecodeTwainSpeed1e4(b *testing.B) { benchmarkDecode(b, twain, speed, 1e4) }
|
||||
func BenchmarkDecodeTwainSpeed1e5(b *testing.B) { benchmarkDecode(b, twain, speed, 1e5) }
|
||||
func BenchmarkDecodeTwainSpeed1e6(b *testing.B) { benchmarkDecode(b, twain, speed, 1e6) }
|
||||
|
@ -45,6 +45,9 @@ func benchmarkEncoder(b *testing.B, testfile, level, n int) {
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkEncodeDigitsHuffman1e4(b *testing.B) { benchmarkEncoder(b, digits, huffman, 1e4) }
|
||||
func BenchmarkEncodeDigitsHuffman1e5(b *testing.B) { benchmarkEncoder(b, digits, huffman, 1e5) }
|
||||
func BenchmarkEncodeDigitsHuffman1e6(b *testing.B) { benchmarkEncoder(b, digits, huffman, 1e6) }
|
||||
func BenchmarkEncodeDigitsSpeed1e4(b *testing.B) { benchmarkEncoder(b, digits, speed, 1e4) }
|
||||
func BenchmarkEncodeDigitsSpeed1e5(b *testing.B) { benchmarkEncoder(b, digits, speed, 1e5) }
|
||||
func BenchmarkEncodeDigitsSpeed1e6(b *testing.B) { benchmarkEncoder(b, digits, speed, 1e6) }
|
||||
@ -54,6 +57,9 @@ func BenchmarkEncodeDigitsDefault1e6(b *testing.B) { benchmarkEncoder(b, digits
|
||||
func BenchmarkEncodeDigitsCompress1e4(b *testing.B) { benchmarkEncoder(b, digits, compress, 1e4) }
|
||||
func BenchmarkEncodeDigitsCompress1e5(b *testing.B) { benchmarkEncoder(b, digits, compress, 1e5) }
|
||||
func BenchmarkEncodeDigitsCompress1e6(b *testing.B) { benchmarkEncoder(b, digits, compress, 1e6) }
|
||||
func BenchmarkEncodeTwainHuffman1e4(b *testing.B) { benchmarkEncoder(b, twain, huffman, 1e4) }
|
||||
func BenchmarkEncodeTwainHuffman1e5(b *testing.B) { benchmarkEncoder(b, twain, huffman, 1e5) }
|
||||
func BenchmarkEncodeTwainHuffman1e6(b *testing.B) { benchmarkEncoder(b, twain, huffman, 1e6) }
|
||||
func BenchmarkEncodeTwainSpeed1e4(b *testing.B) { benchmarkEncoder(b, twain, speed, 1e4) }
|
||||
func BenchmarkEncodeTwainSpeed1e5(b *testing.B) { benchmarkEncoder(b, twain, speed, 1e5) }
|
||||
func BenchmarkEncodeTwainSpeed1e6(b *testing.B) { benchmarkEncoder(b, twain, speed, 1e6) }
|
||||
@ -131,16 +137,17 @@ func TestWriteError(t *testing.T) {
|
||||
|
||||
// Test if two runs produce identical results
|
||||
// even when writing different sizes to the Writer.
|
||||
func TestDeterministicL0(t *testing.T) { testDeterministic(0, t) }
|
||||
func TestDeterministicL1(t *testing.T) { testDeterministic(1, t) }
|
||||
func TestDeterministicL2(t *testing.T) { testDeterministic(2, t) }
|
||||
func TestDeterministicL3(t *testing.T) { testDeterministic(3, t) }
|
||||
func TestDeterministicL4(t *testing.T) { testDeterministic(4, t) }
|
||||
func TestDeterministicL5(t *testing.T) { testDeterministic(5, t) }
|
||||
func TestDeterministicL6(t *testing.T) { testDeterministic(6, t) }
|
||||
func TestDeterministicL7(t *testing.T) { testDeterministic(7, t) }
|
||||
func TestDeterministicL8(t *testing.T) { testDeterministic(8, t) }
|
||||
func TestDeterministicL9(t *testing.T) { testDeterministic(9, t) }
|
||||
func TestDeterministicL0(t *testing.T) { testDeterministic(0, t) }
|
||||
func TestDeterministicL1(t *testing.T) { testDeterministic(1, t) }
|
||||
func TestDeterministicL2(t *testing.T) { testDeterministic(2, t) }
|
||||
func TestDeterministicL3(t *testing.T) { testDeterministic(3, t) }
|
||||
func TestDeterministicL4(t *testing.T) { testDeterministic(4, t) }
|
||||
func TestDeterministicL5(t *testing.T) { testDeterministic(5, t) }
|
||||
func TestDeterministicL6(t *testing.T) { testDeterministic(6, t) }
|
||||
func TestDeterministicL7(t *testing.T) { testDeterministic(7, t) }
|
||||
func TestDeterministicL8(t *testing.T) { testDeterministic(8, t) }
|
||||
func TestDeterministicL9(t *testing.T) { testDeterministic(9, t) }
|
||||
func TestDeterministicLM2(t *testing.T) { testDeterministic(-2, t) }
|
||||
|
||||
func testDeterministic(i int, t *testing.T) {
|
||||
// Test so much we cross a good number of block boundaries.
|
||||
|
Loading…
Reference in New Issue
Block a user