diff --git a/src/pkg/image/jpeg/idct.go b/src/pkg/image/jpeg/idct.go index e5a2f40f5db..b387dfdffd1 100644 --- a/src/pkg/image/jpeg/idct.go +++ b/src/pkg/image/jpeg/idct.go @@ -2,6 +2,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +package jpeg + // This is a Go translation of idct.c from // // http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_IEC_13818-4_2004_Conformance_Testing/Video/verifier/mpeg2decode_960109.tar.gz @@ -35,8 +37,6 @@ * */ -package jpeg - const ( w1 = 2841 // 2048*sqrt(2)*cos(1*pi/16) w2 = 2676 // 2048*sqrt(2)*cos(2*pi/16) @@ -55,41 +55,45 @@ const ( r2 = 181 // 256/sqrt(2) ) -// 2-D Inverse Discrete Cosine Transformation, followed by a +128 level shift. +// idct performs a 2-D Inverse Discrete Cosine Transformation, followed by a +// +128 level shift and a clip to [0, 255], writing the results to dst. +// stride is the number of elements between successive rows of dst. // -// The input coefficients should already have been multiplied by the appropriate quantization table. -// We use fixed-point computation, with the number of bits for the fractional component varying over the -// intermediate stages. The final values are expected to range within [0, 255], after a +128 level shift. +// The input coefficients should already have been multiplied by the +// appropriate quantization table. We use fixed-point computation, with the +// number of bits for the fractional component varying over the intermediate +// stages. // -// For more on the actual algorithm, see Z. Wang, "Fast algorithms for the discrete W transform and -// for the discrete Fourier transform", IEEE Trans. on ASSP, Vol. ASSP- 32, pp. 803-816, Aug. 1984. -func idct(b *block) { +// For more on the actual algorithm, see Z. Wang, "Fast algorithms for the +// discrete W transform and for the discrete Fourier transform", IEEE Trans. on +// ASSP, Vol. ASSP- 32, pp. 803-816, Aug. 1984. +func idct(dst []byte, stride int, src *block) { // Horizontal 1-D IDCT. for y := 0; y < 8; y++ { // If all the AC components are zero, then the IDCT is trivial. - if b[y*8+1] == 0 && b[y*8+2] == 0 && b[y*8+3] == 0 && - b[y*8+4] == 0 && b[y*8+5] == 0 && b[y*8+6] == 0 && b[y*8+7] == 0 { - dc := b[y*8+0] << 3 - b[y*8+0] = dc - b[y*8+1] = dc - b[y*8+2] = dc - b[y*8+3] = dc - b[y*8+4] = dc - b[y*8+5] = dc - b[y*8+6] = dc - b[y*8+7] = dc + if src[y*8+1] == 0 && src[y*8+2] == 0 && src[y*8+3] == 0 && + src[y*8+4] == 0 && src[y*8+5] == 0 && src[y*8+6] == 0 && src[y*8+7] == 0 { + dc := src[y*8+0] << 3 + src[y*8+0] = dc + src[y*8+1] = dc + src[y*8+2] = dc + src[y*8+3] = dc + src[y*8+4] = dc + src[y*8+5] = dc + src[y*8+6] = dc + src[y*8+7] = dc continue } // Prescale. - x0 := (b[y*8+0] << 11) + 128 - x1 := b[y*8+4] << 11 - x2 := b[y*8+6] - x3 := b[y*8+2] - x4 := b[y*8+1] - x5 := b[y*8+7] - x6 := b[y*8+5] - x7 := b[y*8+3] + x0 := (src[y*8+0] << 11) + 128 + x1 := src[y*8+4] << 11 + x2 := src[y*8+6] + x3 := src[y*8+2] + x4 := src[y*8+1] + x5 := src[y*8+7] + x6 := src[y*8+5] + x7 := src[y*8+3] // Stage 1. x8 := w7 * (x4 + x5) @@ -119,14 +123,14 @@ func idct(b *block) { x4 = (r2*(x4-x5) + 128) >> 8 // Stage 4. - b[8*y+0] = (x7 + x1) >> 8 - b[8*y+1] = (x3 + x2) >> 8 - b[8*y+2] = (x0 + x4) >> 8 - b[8*y+3] = (x8 + x6) >> 8 - b[8*y+4] = (x8 - x6) >> 8 - b[8*y+5] = (x0 - x4) >> 8 - b[8*y+6] = (x3 - x2) >> 8 - b[8*y+7] = (x7 - x1) >> 8 + src[8*y+0] = (x7 + x1) >> 8 + src[8*y+1] = (x3 + x2) >> 8 + src[8*y+2] = (x0 + x4) >> 8 + src[8*y+3] = (x8 + x6) >> 8 + src[8*y+4] = (x8 - x6) >> 8 + src[8*y+5] = (x0 - x4) >> 8 + src[8*y+6] = (x3 - x2) >> 8 + src[8*y+7] = (x7 - x1) >> 8 } // Vertical 1-D IDCT. @@ -136,14 +140,14 @@ func idct(b *block) { // we do not bother to check for the all-zero case. // Prescale. - y0 := (b[8*0+x] << 8) + 8192 - y1 := b[8*4+x] << 8 - y2 := b[8*6+x] - y3 := b[8*2+x] - y4 := b[8*1+x] - y5 := b[8*7+x] - y6 := b[8*5+x] - y7 := b[8*3+x] + y0 := (src[8*0+x] << 8) + 8192 + y1 := src[8*4+x] << 8 + y2 := src[8*6+x] + y3 := src[8*2+x] + y4 := src[8*1+x] + y5 := src[8*7+x] + y6 := src[8*5+x] + y7 := src[8*3+x] // Stage 1. y8 := w7*(y4+y5) + 4 @@ -173,18 +177,28 @@ func idct(b *block) { y4 = (r2*(y4-y5) + 128) >> 8 // Stage 4. - b[8*0+x] = (y7 + y1) >> 14 - b[8*1+x] = (y3 + y2) >> 14 - b[8*2+x] = (y0 + y4) >> 14 - b[8*3+x] = (y8 + y6) >> 14 - b[8*4+x] = (y8 - y6) >> 14 - b[8*5+x] = (y0 - y4) >> 14 - b[8*6+x] = (y3 - y2) >> 14 - b[8*7+x] = (y7 - y1) >> 14 + src[8*0+x] = (y7 + y1) >> 14 + src[8*1+x] = (y3 + y2) >> 14 + src[8*2+x] = (y0 + y4) >> 14 + src[8*3+x] = (y8 + y6) >> 14 + src[8*4+x] = (y8 - y6) >> 14 + src[8*5+x] = (y0 - y4) >> 14 + src[8*6+x] = (y3 - y2) >> 14 + src[8*7+x] = (y7 - y1) >> 14 } - // Level shift. - for i := range *b { - b[i] += 128 + // Level shift by +128, clip to [0, 255], and write to dst. + for y := 0; y < 8; y++ { + for x := 0; x < 8; x++ { + c := src[y*8+x] + if c < -128 { + c = 0 + } else if c > 127 { + c = 255 + } else { + c += 128 + } + dst[y*stride+x] = uint8(c) + } } } diff --git a/src/pkg/image/jpeg/reader.go b/src/pkg/image/jpeg/reader.go index 21a6fff9698..74df9ac4b76 100644 --- a/src/pkg/image/jpeg/reader.go +++ b/src/pkg/image/jpeg/reader.go @@ -96,7 +96,6 @@ type decoder struct { huff [maxTc + 1][maxTh + 1]huffman quant [maxTq + 1]block b bits - blocks [nComponent][maxH * maxV]block tmp [1024]byte } @@ -182,45 +181,6 @@ func (d *decoder) processDQT(n int) os.Error { return nil } -// Clip x to the range [0, 255] inclusive. -func clip(x int) uint8 { - if x < 0 { - return 0 - } - if x > 255 { - return 255 - } - return uint8(x) -} - -// Store the MCU to the image. -func (d *decoder) storeMCU(mx, my int) { - h0, v0 := d.comps[0].h, d.comps[0].v - // Store the luma blocks. - for v := 0; v < v0; v++ { - for h := 0; h < h0; h++ { - p := 8 * ((v0*my+v)*d.img.YStride + (h0*mx + h)) - for y := 0; y < 8; y++ { - for x := 0; x < 8; x++ { - d.img.Y[p] = clip(d.blocks[0][h0*v+h][8*y+x]) - p++ - } - p += d.img.YStride - 8 - } - } - } - // Store the chroma blocks. - p := 8 * (my*d.img.CStride + mx) - for y := 0; y < 8; y++ { - for x := 0; x < 8; x++ { - d.img.Cb[p] = clip(d.blocks[1][0][8*y+x]) - d.img.Cr[p] = clip(d.blocks[2][0][8*y+x]) - p++ - } - p += d.img.CStride - 8 - } -} - // Specified in section B.2.3. func (d *decoder) processSOS(n int) os.Error { if n != 4+2*nComponent { @@ -275,14 +235,18 @@ func (d *decoder) processSOS(n int) os.Error { } mcu, expectedRST := 0, uint8(rst0Marker) - var allZeroes block - var dc [nComponent]int + var ( + allZeroes, b block + dc [nComponent]int + ) for my := 0; my < myy; my++ { for mx := 0; mx < mxx; mx++ { for i := 0; i < nComponent; i++ { qt := &d.quant[d.comps[i].tq] for j := 0; j < d.comps[i].h*d.comps[i].v; j++ { - d.blocks[i][j] = allZeroes + // TODO(nigeltao): make this a "var b block" once the compiler's escape + // analysis is good enough to allocate it on the stack, not the heap. + b = allZeroes // Decode the DC coefficient, as specified in section F.2.2.1. value, err := d.decodeHuffman(&d.huff[dcTableClass][scanComps[i].td]) @@ -297,7 +261,7 @@ func (d *decoder) processSOS(n int) os.Error { return err } dc[i] += dcDelta - d.blocks[i][j][0] = dc[i] * qt[0] + b[0] = dc[i] * qt[0] // Decode the AC coefficients, as specified in section F.2.2.2. for k := 1; k < blockSize; k++ { @@ -316,7 +280,7 @@ func (d *decoder) processSOS(n int) os.Error { if err != nil { return err } - d.blocks[i][j][unzig[k]] = ac * qt[k] + b[unzig[k]] = ac * qt[k] } else { if val0 != 0x0f { break @@ -325,10 +289,19 @@ func (d *decoder) processSOS(n int) os.Error { } } - idct(&d.blocks[i][j]) + // Perform the inverse DCT and store the MCU component to the image. + switch i { + case 0: + mx0 := h0*mx + (j % 2) + my0 := v0*my + (j / 2) + idct(d.img.Y[8*(my0*d.img.YStride+mx0):], d.img.YStride, &b) + case 1: + idct(d.img.Cb[8*(my*d.img.CStride+mx):], d.img.CStride, &b) + case 2: + idct(d.img.Cr[8*(my*d.img.CStride+mx):], d.img.CStride, &b) + } } // for j } // for i - d.storeMCU(mx, my) mcu++ if d.ri > 0 && mcu%d.ri == 0 && mcu < mxx*myy { // A more sophisticated decoder could use RST[0-7] markers to resynchronize from corrupt input,