mirror of
https://github.com/golang/go
synced 2024-11-20 04:04:41 -07:00
compress/lzw: implement a decoder.
R=rsc CC=bsiegert, golang-dev, mpl https://golang.org/cl/4182081
This commit is contained in:
parent
b8fa61885a
commit
658447ab66
@ -23,6 +23,7 @@ DIRS=\
|
||||
cmath\
|
||||
compress/flate\
|
||||
compress/gzip\
|
||||
compress/lzw \
|
||||
compress/zlib\
|
||||
container/heap\
|
||||
container/list\
|
||||
|
11
src/pkg/compress/lzw/Makefile
Normal file
11
src/pkg/compress/lzw/Makefile
Normal file
@ -0,0 +1,11 @@
|
||||
# Copyright 2011 The Go Authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
include ../../../Make.inc
|
||||
|
||||
TARG=compress/lzw
|
||||
GOFILES=\
|
||||
reader.go\
|
||||
|
||||
include ../../../Make.pkg
|
211
src/pkg/compress/lzw/reader.go
Normal file
211
src/pkg/compress/lzw/reader.go
Normal file
@ -0,0 +1,211 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// The lzw package implements the Lempel-Ziv-Welch compressed data format,
|
||||
// described in T. A. Welch, ``A Technique for High-Performance Data
|
||||
// Compression'', Computer, 17(6) (June 1984), pp 8-19.
|
||||
//
|
||||
// In particular, it implements LZW as used by the GIF, TIFF and PDF file
|
||||
// formats, which means variable-width codes up to 12 bits and the first
|
||||
// two non-literal codes are a clear code and an EOF code.
|
||||
package lzw
|
||||
|
||||
// TODO(nigeltao): check that TIFF and PDF use LZW in the same way as GIF,
|
||||
// modulo LSB/MSB packing order.
|
||||
|
||||
// TODO(nigeltao): write an encoder.
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
)
|
||||
|
||||
// Order specifies the bit ordering in an LZW data stream.
|
||||
type Order int
|
||||
|
||||
const (
|
||||
// LSB means Least Significant Bits first, as used in the GIF file format.
|
||||
LSB Order = iota
|
||||
// MSB means Most Significant Bits first, as used in the TIFF and PDF
|
||||
// file formats.
|
||||
MSB
|
||||
)
|
||||
|
||||
// decoder is the state from which the readXxx method converts a byte
|
||||
// stream into a code stream.
|
||||
type decoder struct {
|
||||
r io.ByteReader
|
||||
bits uint32
|
||||
nBits uint
|
||||
width uint
|
||||
}
|
||||
|
||||
// readLSB returns the next code for "Least Significant Bits first" data.
|
||||
func (d *decoder) readLSB() (uint16, os.Error) {
|
||||
for d.nBits < d.width {
|
||||
c, err := d.r.ReadByte()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
d.bits |= uint32(c) << d.nBits
|
||||
d.nBits += 8
|
||||
}
|
||||
code := uint16(d.bits & (1<<d.width - 1))
|
||||
d.bits >>= d.width
|
||||
d.nBits -= d.width
|
||||
return code, nil
|
||||
}
|
||||
|
||||
// readMSB returns the next code for "Most Significant Bits first" data.
|
||||
func (d *decoder) readMSB() (uint16, os.Error) {
|
||||
for d.nBits < d.width {
|
||||
c, err := d.r.ReadByte()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
d.bits |= uint32(c) << (24 - d.nBits)
|
||||
d.nBits += 8
|
||||
}
|
||||
code := uint16(d.bits >> (32 - d.width))
|
||||
d.bits <<= d.width
|
||||
d.nBits -= d.width
|
||||
return code, nil
|
||||
}
|
||||
|
||||
// decode decompresses bytes from r and writes them to pw.
|
||||
// read specifies how to decode bytes into codes.
|
||||
// litWidth is the width in bits of literal codes.
|
||||
func decode(pw *io.PipeWriter, r io.ByteReader, read func(*decoder) (uint16, os.Error), litWidth uint) os.Error {
|
||||
const (
|
||||
maxWidth = 12
|
||||
invalidCode = 0xffff
|
||||
)
|
||||
d := decoder{r, 0, 0, 1 + litWidth}
|
||||
w := bufio.NewWriter(pw)
|
||||
// The first 1<<litWidth codes are literal codes.
|
||||
// The next two codes mean clear and EOF.
|
||||
// Other valid codes are in the range [lo, hi] where lo := clear + 2,
|
||||
// with the upper bound incrementing on each code seen.
|
||||
clear := uint16(1) << litWidth
|
||||
eof, hi := clear+1, clear+1
|
||||
// overflow is the code at which hi overflows the code width.
|
||||
overflow := uint16(1) << d.width
|
||||
var (
|
||||
// Each code c in [lo, hi] expands to two or more bytes. For c != hi:
|
||||
// suffix[c] is the last of these bytes.
|
||||
// prefix[c] is the code for all but the last byte.
|
||||
// This code can either be a literal code or another code in [lo, c).
|
||||
// The c == hi case is a special case.
|
||||
suffix [1 << maxWidth]uint8
|
||||
prefix [1 << maxWidth]uint16
|
||||
)
|
||||
|
||||
// Loop over the code stream, converting codes into decompressed bytes.
|
||||
last := uint16(invalidCode)
|
||||
for {
|
||||
code, err := read(&d)
|
||||
if err != nil {
|
||||
if err == os.EOF {
|
||||
err = io.ErrUnexpectedEOF
|
||||
}
|
||||
return err
|
||||
}
|
||||
switch {
|
||||
case code < clear:
|
||||
// We have a literal code.
|
||||
if err := w.WriteByte(uint8(code)); err != nil {
|
||||
return err
|
||||
}
|
||||
if last != invalidCode {
|
||||
// Save what the hi code expands to.
|
||||
suffix[hi] = uint8(code)
|
||||
prefix[hi] = last
|
||||
}
|
||||
case code == clear:
|
||||
d.width = 1 + litWidth
|
||||
hi = eof
|
||||
overflow = 1 << d.width
|
||||
last = invalidCode
|
||||
continue
|
||||
case code == eof:
|
||||
return w.Flush()
|
||||
case code <= hi:
|
||||
// buf is a scratch buffer for reconstituting the bytes that a code expands to.
|
||||
// Code suffixes are written right-to-left from the end of the buffer.
|
||||
var buf [1 << maxWidth]byte
|
||||
c, i := code, len(buf)-1
|
||||
if code == hi {
|
||||
// code == hi is a special case which expands to the last expansion
|
||||
// followed by the head of the last expansion. To find the head, we walk
|
||||
// the prefix chain until we find a literal code.
|
||||
c = last
|
||||
for c >= clear {
|
||||
c = prefix[c]
|
||||
}
|
||||
buf[i] = uint8(c)
|
||||
i--
|
||||
c = last
|
||||
}
|
||||
// Copy the suffix chain into buf and then write that to w.
|
||||
for c >= clear {
|
||||
buf[i] = suffix[c]
|
||||
i--
|
||||
c = prefix[c]
|
||||
}
|
||||
buf[i] = uint8(c)
|
||||
if _, err := w.Write(buf[i:]); err != nil {
|
||||
return err
|
||||
}
|
||||
// Save what the hi code expands to.
|
||||
suffix[hi] = uint8(c)
|
||||
prefix[hi] = last
|
||||
default:
|
||||
return os.NewError("lzw: invalid code")
|
||||
}
|
||||
last, hi = code, hi+1
|
||||
if hi == overflow {
|
||||
if d.width == maxWidth {
|
||||
return os.NewError("lzw: missing clear code")
|
||||
}
|
||||
d.width++
|
||||
overflow <<= 1
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
// NewReader returns a new ReadCloser that can be used to read the uncompressed
|
||||
// version of r. It is the caller's responsibility to call Close on the
|
||||
// ReadCloser when finished reading.
|
||||
// order is either LSB or MSB for Least or Most Significant Bits first packing
|
||||
// order. GIF uses LSB. TIFF and PDF use MSB.
|
||||
// litWidth is the width in bits for literal codes. Valid values range from
|
||||
// 2 to 8 inclusive.
|
||||
func NewReader(r io.Reader, order Order, litWidth int) io.ReadCloser {
|
||||
pr, pw := io.Pipe()
|
||||
var read func(*decoder) (uint16, os.Error)
|
||||
switch order {
|
||||
case LSB:
|
||||
read = (*decoder).readLSB
|
||||
case MSB:
|
||||
read = (*decoder).readMSB
|
||||
default:
|
||||
pw.CloseWithError(os.NewError("lzw: unknown order"))
|
||||
return pr
|
||||
}
|
||||
if litWidth < 2 || 8 < litWidth {
|
||||
pw.CloseWithError(fmt.Errorf("lzw: litWidth %d out of range", litWidth))
|
||||
return pr
|
||||
}
|
||||
go func() {
|
||||
br, ok := r.(io.ByteReader)
|
||||
if !ok {
|
||||
br = bufio.NewReader(r)
|
||||
}
|
||||
pw.CloseWithError(decode(pw, br, read, uint(litWidth)))
|
||||
}()
|
||||
return pr
|
||||
}
|
111
src/pkg/compress/lzw/reader_test.go
Normal file
111
src/pkg/compress/lzw/reader_test.go
Normal file
@ -0,0 +1,111 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package lzw
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type lzwTest struct {
|
||||
desc string
|
||||
raw string
|
||||
compressed string
|
||||
err os.Error
|
||||
}
|
||||
|
||||
var lzwTests = []lzwTest{
|
||||
{
|
||||
"empty;LSB;8",
|
||||
"",
|
||||
"\x01\x01",
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"empty;MSB;8",
|
||||
"",
|
||||
"\x80\x80",
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"tobe;LSB;7",
|
||||
"TOBEORNOTTOBEORTOBEORNOT",
|
||||
"\x54\x4f\x42\x45\x4f\x52\x4e\x4f\x54\x82\x84\x86\x8b\x85\x87\x89\x81",
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"tobe;LSB;8",
|
||||
"TOBEORNOTTOBEORTOBEORNOT",
|
||||
"\x54\x9e\x08\x29\xf2\x44\x8a\x93\x27\x54\x04\x12\x34\xb8\xb0\xe0\xc1\x84\x01\x01",
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"tobe;MSB;7",
|
||||
"TOBEORNOTTOBEORTOBEORNOT",
|
||||
"\x54\x4f\x42\x45\x4f\x52\x4e\x4f\x54\x82\x84\x86\x8b\x85\x87\x89\x81",
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"tobe;MSB;8",
|
||||
"TOBEORNOTTOBEORTOBEORNOT",
|
||||
"\x2a\x13\xc8\x44\x52\x79\x48\x9c\x4f\x2a\x40\xa0\x90\x68\x5c\x16\x0f\x09\x80\x80",
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"tobe-truncated;LSB;8",
|
||||
"TOBEORNOTTOBEORTOBEORNOT",
|
||||
"\x54\x9e\x08\x29\xf2\x44\x8a\x93\x27\x54\x04",
|
||||
io.ErrUnexpectedEOF,
|
||||
},
|
||||
// This example comes from http://en.wikipedia.org/wiki/Graphics_Interchange_Format.
|
||||
{
|
||||
"gif;LSB;8",
|
||||
"\x28\xff\xff\xff\x28\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff",
|
||||
"\x00\x51\xfc\x1b\x28\x70\xa0\xc1\x83\x01\x01",
|
||||
nil,
|
||||
},
|
||||
// This example comes from http://compgroups.net/comp.lang.ruby/Decompressing-LZW-compression-from-PDF-file
|
||||
{
|
||||
"pdf;MSB;8",
|
||||
"-----A---B",
|
||||
"\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01",
|
||||
nil,
|
||||
},
|
||||
}
|
||||
|
||||
func TestReader(t *testing.T) {
|
||||
b := bytes.NewBuffer(nil)
|
||||
for _, tt := range lzwTests {
|
||||
d := strings.Split(tt.desc, ";", -1)
|
||||
var order Order
|
||||
switch d[1] {
|
||||
case "LSB":
|
||||
order = LSB
|
||||
case "MSB":
|
||||
order = MSB
|
||||
default:
|
||||
t.Errorf("%s: bad order %q", tt.desc, d[1])
|
||||
}
|
||||
litWidth, _ := strconv.Atoi(d[2])
|
||||
rc := NewReader(strings.NewReader(tt.compressed), order, litWidth)
|
||||
defer rc.Close()
|
||||
b.Reset()
|
||||
n, err := io.Copy(b, rc)
|
||||
if err != nil {
|
||||
if err != tt.err {
|
||||
t.Errorf("%s: io.Copy: %v want %v", tt.desc, err, tt.err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
s := b.String()
|
||||
if s != tt.raw {
|
||||
t.Errorf("%s: got %d-byte %q want %d-byte %q", tt.desc, n, s, len(tt.raw), tt.raw)
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user