From 8bf58725b23a25a4d153a9b90624eb95c0aeca12 Mon Sep 17 00:00:00 2001
From: Nigel Tao <nigeltao@golang.org>
Date: Thu, 17 Dec 2009 10:32:17 +1100
Subject: [PATCH] Basic image/jpeg decoder.

This is not a complete JPEG implementation (e.g. it does not handle
progressive JPEGs or restart markers), but I was able to take a photo
with my phone, and view the resultant JPEG in pure Go.

The decoder is simple, but slow. The Huffman decoder in particular
should be easily improvable, but optimization is left to future
changelists. Being able to inline functions in the inner loop should
also help performance.

The output is not pixel-for-pixel identical to libjpeg, although
identical behavior isn't necessarily a goal, since JPEG is a lossy
codec. There are at least two reasons for the discrepancy.

First, the inverse DCT algorithm used is the same as Plan9's
src/cmd/jpg, which has different rounding errors from libjpeg's
default IDCT implementation. Note that libjpeg actually has three
different IDCT implementations: one floating point, and two fixed
point. Out of those four, Plan9's seemed the simplest to understand,
partly because it has no #ifdef's or C macros.

Second, for 4:2:2 or 4:2:0 chroma sampling, this implementation does
nearest neighbor upsampling, compared to libjpeg's triangle filter
(e.g. see h2v1_fancy_upsample in jdsample.c).

The difference from the first reason is typically zero, but sometimes
1 (out of 256) in YCbCr space, or double that in RGB space. The
difference from the second reason can be as large as 8/256 in YCbCr
space, in regions of steep chroma gradients. Informal eyeballing
suggests that the net difference is typically imperceptible, though.

R=r
CC=golang-dev, rsc
https://golang.org/cl/164056
---
 src/pkg/Makefile              |   2 +
 src/pkg/image/jpeg/Makefile   |  13 ++
 src/pkg/image/jpeg/huffman.go | 190 ++++++++++++++++
 src/pkg/image/jpeg/idct.go    | 190 ++++++++++++++++
 src/pkg/image/jpeg/reader.go  | 392 ++++++++++++++++++++++++++++++++++
 5 files changed, 787 insertions(+)
 create mode 100644 src/pkg/image/jpeg/Makefile
 create mode 100644 src/pkg/image/jpeg/huffman.go
 create mode 100644 src/pkg/image/jpeg/idct.go
 create mode 100644 src/pkg/image/jpeg/reader.go

diff --git a/src/pkg/Makefile b/src/pkg/Makefile
index db33ab26299..f37502d58d4 100644
--- a/src/pkg/Makefile
+++ b/src/pkg/Makefile
@@ -72,6 +72,7 @@ DIRS=\
 	hash/crc32\
 	http\
 	image\
+	image/jpeg\
 	image/png\
 	io\
 	io/ioutil\
@@ -117,6 +118,7 @@ NOTEST=\
 	go/token\
 	hash\
 	image\
+	image/jpeg\
 	malloc\
 	rand\
 	runtime\
diff --git a/src/pkg/image/jpeg/Makefile b/src/pkg/image/jpeg/Makefile
new file mode 100644
index 00000000000..c84811d6ada
--- /dev/null
+++ b/src/pkg/image/jpeg/Makefile
@@ -0,0 +1,13 @@
+# Copyright 2009 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include $(GOROOT)/src/Make.$(GOARCH)
+
+TARG=image/jpeg
+GOFILES=\
+	huffman.go\
+	idct.go\
+	reader.go\
+
+include $(GOROOT)/src/Make.pkg
diff --git a/src/pkg/image/jpeg/huffman.go b/src/pkg/image/jpeg/huffman.go
new file mode 100644
index 00000000000..0d03a7317e2
--- /dev/null
+++ b/src/pkg/image/jpeg/huffman.go
@@ -0,0 +1,190 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package jpeg
+
+import (
+	"io"
+	"os"
+)
+
+// Each code is at most 16 bits long.
+const maxCodeLength = 16
+
+// Each decoded value is a uint8, so there are at most 256 such values.
+const maxNumValues = 256
+
+// Bit stream for the Huffman decoder.
+// The n least significant bits of a form the unread bits, to be read in MSB to LSB order.
+type bits struct {
+	a int // accumulator.
+	n int // the number of unread bits in a.
+	m int // mask. m==1<<(n-1) when n>0, with m==0 when n==0.
+}
+
+// Huffman table decoder, specified in section C.
+type huffman struct {
+	l        [maxCodeLength]int
+	length   int                 // sum of l[i].
+	val      [maxNumValues]uint8 // the decoded values, as sorted by their encoding.
+	size     [maxNumValues]int   // size[i] is the number of bits to encode val[i].
+	code     [maxNumValues]int   // code[i] is the encoding of val[i].
+	minCode  [maxCodeLength]int  // min codes of length i, or -1 if no codes of that length.
+	maxCode  [maxCodeLength]int  // max codes of length i, or -1 if no codes of that length.
+	valIndex [maxCodeLength]int  // index into val of minCode[i].
+}
+
+// Reads bytes from the io.Reader to ensure that bits.n is at least n.
+func (d *decoder) ensureNBits(n int) os.Error {
+	for d.b.n < n {
+		c, err := d.r.ReadByte()
+		if err != nil {
+			return err
+		}
+		d.b.a = d.b.a<<8 | int(c)
+		d.b.n += 8
+		if d.b.m == 0 {
+			d.b.m = 1 << 7
+		} else {
+			d.b.m <<= 8
+		}
+		// Byte stuffing, specified in section F.1.2.3.
+		if c == 0xff {
+			c, err = d.r.ReadByte()
+			if err != nil {
+				return err
+			}
+			if c != 0x00 {
+				return FormatError("missing 0xff00 sequence")
+			}
+		}
+	}
+	return nil
+}
+
+// The composition of RECEIVE and EXTEND, specified in section F.2.2.1.
+func (d *decoder) receiveExtend(t uint8) (int, os.Error) {
+	err := d.ensureNBits(int(t))
+	if err != nil {
+		return 0, err
+	}
+	d.b.n -= int(t)
+	d.b.m >>= t
+	s := 1 << t
+	x := (d.b.a >> uint8(d.b.n)) & (s - 1)
+	if x < s>>1 {
+		x += ((-1) << t) + 1
+	}
+	return x, nil
+}
+
+// Processes a Define Huffman Table marker, and initializes a huffman struct from its contents.
+// Specified in section B.2.4.2.
+func (d *decoder) processDHT(n int) os.Error {
+	for n > 0 {
+		if n < 17 {
+			return FormatError("DHT has wrong length")
+		}
+		_, err := io.ReadFull(d.r, d.tmp[0:17])
+		if err != nil {
+			return err
+		}
+		tc := d.tmp[0] >> 4
+		if tc > maxTc {
+			return FormatError("bad Tc value")
+		}
+		th := d.tmp[0] & 0x0f
+		const isBaseline = true // Progressive mode is not yet supported.
+		if th > maxTh || isBaseline && th > 1 {
+			return FormatError("bad Th value")
+		}
+		h := &d.huff[tc][th]
+
+		// Read l and val (and derive length).
+		h.length = 0
+		for i := 0; i < maxCodeLength; i++ {
+			h.l[i] = int(d.tmp[i+1])
+			h.length += h.l[i]
+		}
+		if h.length == 0 {
+			return FormatError("Huffman table has zero length")
+		}
+		if h.length > maxNumValues {
+			return FormatError("Huffman table has excessive length")
+		}
+		n -= h.length + 17
+		if n < 0 {
+			return FormatError("DHT has wrong length")
+		}
+		_, err = io.ReadFull(d.r, h.val[0:h.length])
+		if err != nil {
+			return err
+		}
+
+		// Derive size.
+		k := 0
+		for i := 0; i < maxCodeLength; i++ {
+			for j := 0; j < h.l[i]; j++ {
+				h.size[k] = i + 1
+				k++
+			}
+		}
+
+		// Derive code.
+		code := 0
+		size := h.size[0]
+		for i := 0; i < h.length; i++ {
+			if size != h.size[i] {
+				code <<= uint8(h.size[i] - size)
+				size = h.size[i]
+			}
+			h.code[i] = code
+			code++
+		}
+
+		// Derive minCode, maxCode, and valIndex.
+		k = 0
+		index := 0
+		for i := 0; i < maxCodeLength; i++ {
+			if h.l[i] == 0 {
+				h.minCode[i] = -1
+				h.maxCode[i] = -1
+				h.valIndex[i] = -1
+			} else {
+				h.minCode[i] = k
+				h.maxCode[i] = k + h.l[i] - 1
+				h.valIndex[i] = index
+				k += h.l[i]
+				index += h.l[i]
+			}
+			k <<= 1
+		}
+	}
+	return nil
+}
+
+// Returns the next Huffman-coded value from the bit stream, decoded according to h.
+// TODO(nigeltao): This decoding algorithm is simple, but slow. A lookahead table, instead of always
+// peeling off only 1 bit at at time, ought to be faster.
+func (d *decoder) decodeHuffman(h *huffman) (uint8, os.Error) {
+	if h.length == 0 {
+		return 0, FormatError("uninitialized Huffman table")
+	}
+	for i, code := 0, 0; i < maxCodeLength; i++ {
+		err := d.ensureNBits(1)
+		if err != nil {
+			return 0, err
+		}
+		if d.b.a&d.b.m != 0 {
+			code |= 1
+		}
+		d.b.n--
+		d.b.m >>= 1
+		if code <= h.maxCode[i] {
+			return h.val[h.valIndex[i]+code-h.minCode[i]], nil
+		}
+		code <<= 1
+	}
+	return 0, FormatError("bad Huffman code")
+}
diff --git a/src/pkg/image/jpeg/idct.go b/src/pkg/image/jpeg/idct.go
new file mode 100644
index 00000000000..5189931105b
--- /dev/null
+++ b/src/pkg/image/jpeg/idct.go
@@ -0,0 +1,190 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This is a Go translation of idct.c from
+//
+// http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_IEC_13818-4_2004_Conformance_Testing/Video/verifier/mpeg2decode_960109.tar.gz
+//
+// which carries the following notice:
+
+/* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
+
+/*
+ * Disclaimer of Warranty
+ *
+ * These software programs are available to the user without any license fee or
+ * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
+ * any and all warranties, whether express, implied, or statuary, including any
+ * implied warranties or merchantability or of fitness for a particular
+ * purpose.  In no event shall the copyright-holder be liable for any
+ * incidental, punitive, or consequential damages of any kind whatsoever
+ * arising from the use of these programs.
+ *
+ * This disclaimer of warranty extends to the user of these programs and user's
+ * customers, employees, agents, transferees, successors, and assigns.
+ *
+ * The MPEG Software Simulation Group does not represent or warrant that the
+ * programs furnished hereunder are free of infringement of any third-party
+ * patents.
+ *
+ * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
+ * are subject to royalty fees to patent holders.  Many of these patents are
+ * general enough such that they are unavoidable regardless of implementation
+ * design.
+ *
+ */
+
+package jpeg
+
+const (
+	w1 = 2841 // 2048*sqrt(2)*cos(1*pi/16)
+	w2 = 2676 // 2048*sqrt(2)*cos(2*pi/16)
+	w3 = 2408 // 2048*sqrt(2)*cos(3*pi/16)
+	w5 = 1609 // 2048*sqrt(2)*cos(5*pi/16)
+	w6 = 1108 // 2048*sqrt(2)*cos(6*pi/16)
+	w7 = 565  // 2048*sqrt(2)*cos(7*pi/16)
+
+	w1pw7 = w1 + w7
+	w1mw7 = w1 - w7
+	w2pw6 = w2 + w6
+	w2mw6 = w2 - w6
+	w3pw5 = w3 + w5
+	w3mw5 = w3 - w5
+
+	r2 = 181 // 256/sqrt(2)
+)
+
+// 2-D Inverse Discrete Cosine Transformation, followed by a +128 level shift.
+//
+// The input coefficients should already have been multiplied by the appropriate quantization table.
+// We use fixed-point computation, with the number of bits for the fractional component varying over the
+// intermediate stages. The final values are expected to range within [0, 255], after a +128 level shift.
+//
+// For more on the actual algorithm, see Z. Wang, "Fast algorithms for the discrete W transform and
+// for the discrete Fourier transform", IEEE Trans. on ASSP, Vol. ASSP- 32, pp. 803-816, Aug. 1984.
+func idct(b *[blockSize]int) {
+	// Horizontal 1-D IDCT.
+	for y := 0; y < 8; y++ {
+		// If all the AC components are zero, then the IDCT is trivial.
+		if b[y*8+1] == 0 && b[y*8+2] == 0 && b[y*8+3] == 0 &&
+			b[y*8+4] == 0 && b[y*8+5] == 0 && b[y*8+6] == 0 && b[y*8+7] == 0 {
+			dc := b[y*8+0] << 3
+			b[y*8+0] = dc
+			b[y*8+1] = dc
+			b[y*8+2] = dc
+			b[y*8+3] = dc
+			b[y*8+4] = dc
+			b[y*8+5] = dc
+			b[y*8+6] = dc
+			b[y*8+7] = dc
+			continue
+		}
+
+		// Prescale.
+		x0 := (b[y*8+0] << 11) + 128
+		x1 := b[y*8+4] << 11
+		x2 := b[y*8+6]
+		x3 := b[y*8+2]
+		x4 := b[y*8+1]
+		x5 := b[y*8+7]
+		x6 := b[y*8+5]
+		x7 := b[y*8+3]
+
+		// Stage 1.
+		x8 := w7 * (x4 + x5)
+		x4 = x8 + w1mw7*x4
+		x5 = x8 - w1pw7*x5
+		x8 = w3 * (x6 + x7)
+		x6 = x8 - w3mw5*x6
+		x7 = x8 - w3pw5*x7
+
+		// Stage 2.
+		x8 = x0 + x1
+		x0 -= x1
+		x1 = w6 * (x3 + x2)
+		x2 = x1 - w2pw6*x2
+		x3 = x1 + w2mw6*x3
+		x1 = x4 + x6
+		x4 -= x6
+		x6 = x5 + x7
+		x5 -= x7
+
+		// Stage 3.
+		x7 = x8 + x3
+		x8 -= x3
+		x3 = x0 + x2
+		x0 -= x2
+		x2 = (r2*(x4+x5) + 128) >> 8
+		x4 = (r2*(x4-x5) + 128) >> 8
+
+		// Stage 4.
+		b[8*y+0] = (x7 + x1) >> 8
+		b[8*y+1] = (x3 + x2) >> 8
+		b[8*y+2] = (x0 + x4) >> 8
+		b[8*y+3] = (x8 + x6) >> 8
+		b[8*y+4] = (x8 - x6) >> 8
+		b[8*y+5] = (x0 - x4) >> 8
+		b[8*y+6] = (x3 - x2) >> 8
+		b[8*y+7] = (x7 - x1) >> 8
+	}
+
+	// Vertical 1-D IDCT.
+	for x := 0; x < 8; x++ {
+		// Similar to the horizontal 1-D IDCT case, if all the AC components are zero, then the IDCT is trivial.
+		// However, after performing the horizontal 1-D IDCT, there are typically non-zero AC components, so
+		// we do not bother to check for the all-zero case.
+
+		// Prescale.
+		y0 := (b[8*0+x] << 8) + 8192
+		y1 := b[8*4+x] << 8
+		y2 := b[8*6+x]
+		y3 := b[8*2+x]
+		y4 := b[8*1+x]
+		y5 := b[8*7+x]
+		y6 := b[8*5+x]
+		y7 := b[8*3+x]
+
+		// Stage 1.
+		y8 := w7*(y4+y5) + 4
+		y4 = (y8 + w1mw7*y4) >> 3
+		y5 = (y8 - w1pw7*y5) >> 3
+		y8 = w3*(y6+y7) + 4
+		y6 = (y8 - w3mw5*y6) >> 3
+		y7 = (y8 - w3pw5*y7) >> 3
+
+		// Stage 2.
+		y8 = y0 + y1
+		y0 -= y1
+		y1 = w6*(y3+y2) + 4
+		y2 = (y1 - w2pw6*y2) >> 3
+		y3 = (y1 + w2mw6*y3) >> 3
+		y1 = y4 + y6
+		y4 -= y6
+		y6 = y5 + y7
+		y5 -= y7
+
+		// Stage 3.
+		y7 = y8 + y3
+		y8 -= y3
+		y3 = y0 + y2
+		y0 -= y2
+		y2 = (r2*(y4+y5) + 128) >> 8
+		y4 = (r2*(y4-y5) + 128) >> 8
+
+		// Stage 4.
+		b[8*0+x] = (y7 + y1) >> 14
+		b[8*1+x] = (y3 + y2) >> 14
+		b[8*2+x] = (y0 + y4) >> 14
+		b[8*3+x] = (y8 + y6) >> 14
+		b[8*4+x] = (y8 - y6) >> 14
+		b[8*5+x] = (y0 - y4) >> 14
+		b[8*6+x] = (y3 - y2) >> 14
+		b[8*7+x] = (y7 - y1) >> 14
+	}
+
+	// Level shift.
+	for i := range *b {
+		b[i] += 128
+	}
+}
diff --git a/src/pkg/image/jpeg/reader.go b/src/pkg/image/jpeg/reader.go
new file mode 100644
index 00000000000..7e640d9bac5
--- /dev/null
+++ b/src/pkg/image/jpeg/reader.go
@@ -0,0 +1,392 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The jpeg package implements a decoder for JPEG images, as defined in ITU-T T.81.
+package jpeg
+
+// See http://www.w3.org/Graphics/JPEG/itu-t81.pdf
+
+import (
+	"bufio"
+	"image"
+	"io"
+	"os"
+)
+
+// A FormatError reports that the input is not a valid JPEG.
+type FormatError string
+
+func (e FormatError) String() string { return "invalid JPEG format: " + string(e) }
+
+// An UnsupportedError reports that the input uses a valid but unimplemented JPEG feature.
+type UnsupportedError string
+
+func (e UnsupportedError) String() string { return "unsupported JPEG feature: " + string(e) }
+
+// Component specification, specified in section B.2.2.
+type component struct {
+	c  uint8 // Component identifier.
+	h  uint8 // Horizontal sampling factor.
+	v  uint8 // Vertical sampling factor.
+	tq uint8 // Quantization table destination selector.
+}
+
+const (
+	blockSize = 64 // A DCT block is 8x8.
+
+	dcTableClass = 0
+	acTableClass = 1
+	maxTc        = 1
+	maxTh        = 3
+	maxTq        = 3
+
+	// We only support 4:4:4, 4:2:2 and 4:2:0 downsampling, and assume that the components are Y, Cb, Cr.
+	nComponent = 3
+	maxH       = 2
+	maxV       = 2
+)
+
+const (
+	soiMarker   = 0xd8 // Start Of Image.
+	eoiMarker   = 0xd9 // End Of Image.
+	sof0Marker  = 0xc0 // Start Of Frame (Baseline).
+	sof2Marker  = 0xc2 // Start Of Frame (Progressive).
+	dhtMarker   = 0xc4 // Define Huffman Table.
+	dqtMarker   = 0xdb // Define Quantization Table.
+	sosMarker   = 0xda // Start Of Scan.
+	app0Marker  = 0xe0 // APPlication specific (0).
+	app15Marker = 0xef // APPlication specific (15).
+	comMarker   = 0xfe // COMment.
+)
+
+// Maps from the zig-zag ordering to the natural ordering.
+var unzig = [blockSize]int{
+	0, 1, 8, 16, 9, 2, 3, 10,
+	17, 24, 32, 25, 18, 11, 4, 5,
+	12, 19, 26, 33, 40, 48, 41, 34,
+	27, 20, 13, 6, 7, 14, 21, 28,
+	35, 42, 49, 56, 57, 50, 43, 36,
+	29, 22, 15, 23, 30, 37, 44, 51,
+	58, 59, 52, 45, 38, 31, 39, 46,
+	53, 60, 61, 54, 47, 55, 62, 63,
+}
+
+// If the passed in io.Reader does not also have ReadByte, then Decode will introduce its own buffering.
+type Reader interface {
+	io.Reader
+	ReadByte() (c byte, err os.Error)
+}
+
+type decoder struct {
+	r             Reader
+	width, height int
+	image         *image.RGBA
+	comps         [nComponent]component
+	huff          [maxTc + 1][maxTh + 1]huffman
+	quant         [maxTq + 1][blockSize]int
+	b             bits
+	blocks        [nComponent][maxH * maxV][blockSize]int
+	tmp           [1024]byte
+}
+
+// Reads and ignores the next n bytes.
+func (d *decoder) ignore(n int) os.Error {
+	for n > 0 {
+		m := len(d.tmp)
+		if m > n {
+			m = n
+		}
+		_, err := io.ReadFull(d.r, d.tmp[0:m])
+		if err != nil {
+			return err
+		}
+		n -= m
+	}
+	return nil
+}
+
+// Specified in section B.2.2.
+func (d *decoder) processSOF(n int) os.Error {
+	if n != 6+3*nComponent {
+		return UnsupportedError("SOF has wrong length")
+	}
+	_, err := io.ReadFull(d.r, d.tmp[0:6+3*nComponent])
+	if err != nil {
+		return err
+	}
+	// We only support 8-bit precision.
+	if d.tmp[0] != 8 {
+		return UnsupportedError("precision")
+	}
+	d.height = int(d.tmp[1])<<8 + int(d.tmp[2])
+	d.width = int(d.tmp[3])<<8 + int(d.tmp[4])
+	if d.tmp[5] != nComponent {
+		return UnsupportedError("SOF has wrong number of image components")
+	}
+	for i := 0; i < nComponent; i++ {
+		hv := d.tmp[7+3*i]
+		d.comps[i].c = d.tmp[6+3*i]
+		d.comps[i].h = hv >> 4
+		d.comps[i].v = hv & 0x0f
+		d.comps[i].tq = d.tmp[8+3*i]
+		// We only support YCbCr images, and 4:4:4, 4:2:2 or 4:2:0 chroma downsampling ratios. This implies that
+		// the (h, v) values for the Y component are either (1, 1), (2, 1) or (2, 2), and the
+		// (h, v) values for the Cr and Cb components must be (1, 1).
+		if i == 0 {
+			if hv != 0x11 && hv != 0x21 && hv != 0x22 {
+				return UnsupportedError("luma downsample ratio")
+			}
+		} else {
+			if hv != 0x11 {
+				return UnsupportedError("chroma downsample ratio")
+			}
+		}
+	}
+	d.image = image.NewRGBA(d.width, d.height)
+	return nil
+}
+
+// Specified in section B.2.4.1.
+func (d *decoder) processDQT(n int) os.Error {
+	const qtLength = 1 + blockSize
+	for ; n >= qtLength; n -= qtLength {
+		_, err := io.ReadFull(d.r, d.tmp[0:qtLength])
+		if err != nil {
+			return err
+		}
+		pq := d.tmp[0] >> 4
+		if pq != 0 {
+			return UnsupportedError("bad Pq value")
+		}
+		tq := d.tmp[0] & 0x0f
+		if tq > maxTq {
+			return FormatError("bad Tq value")
+		}
+		for i := range d.quant[tq] {
+			d.quant[tq][i] = int(d.tmp[i+1])
+		}
+	}
+	if n != 0 {
+		return FormatError("DQT has wrong length")
+	}
+	return nil
+}
+
+// Set the Pixel (px, py)'s RGB value, based on its YCbCr value.
+func (d *decoder) calcPixel(px, py, lumaBlock, lumaIndex, chromaIndex int) {
+	y, cb, cr := d.blocks[0][lumaBlock][lumaIndex], d.blocks[1][0][chromaIndex], d.blocks[2][0][chromaIndex]
+	// The JFIF specification (http://www.w3.org/Graphics/JPEG/jfif3.pdf, page 3) gives the formula
+	// for translating YCbCr to RGB as:
+	//   R = Y + 1.402 (Cr-128)
+	//   G = Y - 0.34414 (Cb-128) - 0.71414 (Cr-128)
+	//   B = Y + 1.772 (Cb-128)
+	yPlusHalf := 100000*y + 50000
+	cb -= 128
+	cr -= 128
+	r := (yPlusHalf + 140200*cr) / 100000
+	g := (yPlusHalf - 34414*cb - 71414*cr) / 100000
+	b := (yPlusHalf + 177200*cb) / 100000
+	if r < 0 {
+		r = 0
+	} else if r > 255 {
+		r = 255
+	}
+	if g < 0 {
+		g = 0
+	} else if g > 255 {
+		g = 255
+	}
+	if b < 0 {
+		b = 0
+	} else if b > 255 {
+		b = 255
+	}
+	d.image.Pixel[py][px] = image.RGBAColor{uint8(r), uint8(g), uint8(b), 0xff}
+}
+
+// Convert the MCU from YCbCr to RGB.
+func (d *decoder) convertMCU(mx, my, h0, v0 int) {
+	lumaBlock := 0
+	for v := 0; v < v0; v++ {
+		for h := 0; h < h0; h++ {
+			chromaBase := 8*4*v + 4*h
+			py := 8 * (v0*my + v)
+			for y := 0; y < 8 && py < d.height; y++ {
+				px := 8 * (h0*mx + h)
+				lumaIndex := 8 * y
+				chromaIndex := chromaBase + 8*(y/v0)
+				for x := 0; x < 8 && px < d.width; x++ {
+					d.calcPixel(px, py, lumaBlock, lumaIndex, chromaIndex)
+					if h0 == 1 {
+						chromaIndex += 1
+					} else {
+						chromaIndex += x % 2
+					}
+					lumaIndex++
+					px++
+				}
+				py++
+			}
+			lumaBlock++
+		}
+	}
+}
+
+// Specified in section B.2.3.
+func (d *decoder) processSOS(n int) os.Error {
+	if d.image == nil {
+		return FormatError("missing SOF segment")
+	}
+	if n != 4+2*nComponent {
+		return UnsupportedError("SOS has wrong length")
+	}
+	_, err := io.ReadFull(d.r, d.tmp[0:4+2*nComponent])
+	if err != nil {
+		return err
+	}
+	if d.tmp[0] != nComponent {
+		return UnsupportedError("SOS has wrong number of image components")
+	}
+	var scanComps [nComponent]struct {
+		td uint8 // DC table selector.
+		ta uint8 // AC table selector.
+	}
+	h0, v0 := int(d.comps[0].h), int(d.comps[0].v) // The h and v values from the Y components.
+	for i := 0; i < nComponent; i++ {
+		cs := d.tmp[1+2*i] // Component selector.
+		if cs != d.comps[i].c {
+			return UnsupportedError("scan components out of order")
+		}
+		scanComps[i].td = d.tmp[2+2*i] >> 4
+		scanComps[i].ta = d.tmp[2+2*i] & 0x0f
+	}
+	// mxx and myy are the number of MCUs (Minimum Coded Units) in the image.
+	mxx := (d.width + 8*int(h0) - 1) / (8 * int(h0))
+	myy := (d.height + 8*int(v0) - 1) / (8 * int(v0))
+
+	var allZeroes [blockSize]int
+	var dc [nComponent]int
+	for my := 0; my < myy; my++ {
+		for mx := 0; mx < mxx; mx++ {
+			for i := 0; i < nComponent; i++ {
+				qt := &d.quant[d.comps[i].tq]
+				for j := 0; j < int(d.comps[i].h*d.comps[i].v); j++ {
+					d.blocks[i][j] = allZeroes
+
+					// Decode the DC coefficient, as specified in section F.2.2.1.
+					value, err := d.decodeHuffman(&d.huff[dcTableClass][scanComps[i].td])
+					if err != nil {
+						return err
+					}
+					if value > 16 {
+						return UnsupportedError("excessive DC component")
+					}
+					dcDelta, err := d.receiveExtend(value)
+					if err != nil {
+						return err
+					}
+					dc[i] += dcDelta
+					d.blocks[i][j][0] = dc[i] * qt[0]
+
+					// Decode the AC coefficients, as specified in section F.2.2.2.
+					for k := 1; k < blockSize; k++ {
+						value, err := d.decodeHuffman(&d.huff[acTableClass][scanComps[i].ta])
+						if err != nil {
+							return err
+						}
+						v0 := value >> 4
+						v1 := value & 0x0f
+						if v1 != 0 {
+							k += int(v0)
+							if k > blockSize {
+								return FormatError("bad DCT index")
+							}
+							ac, err := d.receiveExtend(v1)
+							if err != nil {
+								return err
+							}
+							d.blocks[i][j][unzig[k]] = ac * qt[k]
+						} else {
+							if v0 != 0x0f {
+								break
+							}
+							k += 0x0f
+						}
+					}
+
+					idct(&d.blocks[i][j])
+				} // for j
+			} // for i
+			d.convertMCU(mx, my, int(d.comps[0].h), int(d.comps[0].v))
+		} // for mx
+	} // for my
+
+	return nil
+}
+
+// Decode reads a JPEG formatted image from r and returns it as an image.Image.
+func Decode(r io.Reader) (image.Image, os.Error) {
+	var d decoder
+	if rr, ok := r.(Reader); ok {
+		d.r = rr
+	} else {
+		d.r = bufio.NewReader(r)
+	}
+
+	// Check for the Start Of Image marker.
+	_, err := io.ReadFull(r, d.tmp[0:2])
+	if err != nil {
+		return nil, err
+	}
+	if d.tmp[0] != 0xff || d.tmp[1] != soiMarker {
+		return nil, FormatError("missing SOI marker")
+	}
+
+	// Process the remaining segments until the End Of Image marker.
+	for {
+		_, err := io.ReadFull(r, d.tmp[0:2])
+		if err != nil {
+			return nil, err
+		}
+		if d.tmp[0] != 0xff {
+			return nil, FormatError("missing 0xff marker start")
+		}
+		marker := d.tmp[1]
+		if marker == eoiMarker { // End Of Image.
+			break
+		}
+
+		// Read the 16-bit length of the segment. The value includes the 2 bytes for the
+		// length itself, so we subtract 2 to get the number of remaining bytes.
+		_, err = io.ReadFull(r, d.tmp[0:2])
+		if err != nil {
+			return nil, err
+		}
+		n := int(d.tmp[0])<<8 + int(d.tmp[1]) - 2
+		if n < 0 {
+			return nil, FormatError("short segment length")
+		}
+
+		switch {
+		case marker == sof0Marker: // Start Of Frame (Baseline).
+			err = d.processSOF(n)
+		case marker == sof2Marker: // Start Of Frame (Progressive).
+			err = UnsupportedError("progressive mode")
+		case marker == dhtMarker: // Define Huffman Table.
+			err = d.processDHT(n)
+		case marker == dqtMarker: // Define Quantization Table.
+			err = d.processDQT(n)
+		case marker == sosMarker: // Start Of Scan.
+			err = d.processSOS(n)
+		case marker >= app0Marker && marker <= app15Marker || marker == comMarker: // APPlication specific, or COMment.
+			err = d.ignore(n)
+		default:
+			err = UnsupportedError("unknown marker")
+		}
+		if err != nil {
+			return nil, err
+		}
+	}
+	return d.image, nil
+}