image/jpeg: change block from [64]int to [64]int32.

On 6g/linux: benchmark old ns/op new ns/op delta BenchmarkFDCT 4606 4241 -7.92% BenchmarkIDCT 4187 3923 -6.31% BenchmarkDecodeBaseline 3154864 3170224 +0.49% BenchmarkDecodeProgressive 4072812 4017132 -1.37% BenchmarkEncode 39406920 34596760 -12.21% Stack requirements before (from 'go tool 6g -S'): (scan.go:37) TEXT (*decoder).processSOS+0(SB),$1352-32 (writer.go:448) TEXT (*encoder).writeSOS+0(SB),$5344-24 after: (scan.go:37) TEXT (*decoder).processSOS+0(SB),$1064-32 (writer.go:448) TEXT (*encoder).writeSOS+0(SB),$2520-24 Also, in encoder.writeSOS, re-use the yBlock scratch buffer for Cb and Cr. This reduces the stack requirement slightly, but also avoids an unlucky coincidence where a BenchmarkEncode stack split lands between encoder.writeByte and bufio.Writer.WriteByte, which occurs very often during Huffman encoding and is otherwise disasterous for the final benchmark number. FWIW, the yBlock re-use *without* the s/int/int32/ change does not have a noticable effect on the benchmarks. R=r CC=golang-dev, rsc https://golang.org/cl/6823043
2024-11-20 05:44:44 -07:00 · 2012-10-30 11:10:08 +11:00 · 2012-10-30 11:10:08 +11:00 · daf43ba476
commit daf43ba476
parent 9c775353b9
6 changed files with 46 additions and 48 deletions
--- a/src/pkg/image/jpeg/dct_test.go
+++ b/src/pkg/image/jpeg/dct_test.go
@ -42,7 +42,7 @@ func TestDCT(t *testing.T) {
 		b := block{}
 		n := r.Int() % 64
 		for j := 0; j < n; j++ {
-			b[r.Int()%len(b)] = r.Int() % 256
+			b[r.Int()%len(b)] = r.Int31() % 256
 		}
 		blocks = append(blocks, b)
 	}
@ -144,9 +144,9 @@ func slowFDCT(b *block) {
 			dst[8*v+u] = sum / 8
 		}
 	}
-	// Convert from float64 to int.
+	// Convert from float64 to int32.
 	for i := range dst {
-		b[i] = int(dst[i] + 0.5)
+		b[i] = int32(dst[i] + 0.5)
 	}
 }

@ -174,9 +174,9 @@ func slowIDCT(b *block) {
 			dst[8*y+x] = sum / 8
 		}
 	}
-	// Convert from float64 to int.
+	// Convert from float64 to int32.
 	for i := range dst {
-		b[i] = int(dst[i] + 0.5)
+		b[i] = int32(dst[i] + 0.5)
 	}
 }

--- a/src/pkg/image/jpeg/huffman.go
+++ b/src/pkg/image/jpeg/huffman.go
@ -61,15 +61,15 @@ func (d *decoder) ensureNBits(n int) error {
 }

 // The composition of RECEIVE and EXTEND, specified in section F.2.2.1.
-func (d *decoder) receiveExtend(t uint8) (int, error) {
+func (d *decoder) receiveExtend(t uint8) (int32, error) {
 	err := d.ensureNBits(int(t))
 	if err != nil {
 		return 0, err
 	}
 	d.b.n -= int(t)
 	d.b.m >>= t
-	s := 1 << t
-	x := int(d.b.a>>uint8(d.b.n)) & (s - 1)
+	s := int32(1) << t
+	x := int32(d.b.a>>uint8(d.b.n)) & (s - 1)
 	if x < s>>1 {
 		x += ((-1) << t) + 1
 	}
--- a/src/pkg/image/jpeg/idct.go
+++ b/src/pkg/image/jpeg/idct.go
@ -39,7 +39,7 @@ package jpeg

 const blockSize = 64 // A DCT block is 8x8.

-type block [blockSize]int
+type block [blockSize]int32

 const (
 	w1 = 2841 // 2048*sqrt(2)*cos(1*pi/16)
--- a/src/pkg/image/jpeg/reader.go
+++ b/src/pkg/image/jpeg/reader.go
@ -187,7 +187,7 @@ func (d *decoder) processDQT(n int) error {
 			return FormatError("bad Tq value")
 		}
 		for i := range d.quant[tq] {
-			d.quant[tq][i] = int(d.tmp[i+1])
+			d.quant[tq][i] = int32(d.tmp[i+1])
 		}
 	}
 	if n != 0 {
--- a/src/pkg/image/jpeg/scan.go
+++ b/src/pkg/image/jpeg/scan.go
@ -86,12 +86,12 @@ func (d *decoder) processSOS(n int) error {
 	// significant bit.
 	//
 	// For baseline JPEGs, these parameters are hard-coded to 0/63/0/0.
-	zigStart, zigEnd, ah, al := 0, blockSize-1, uint(0), uint(0)
+	zigStart, zigEnd, ah, al := int32(0), int32(blockSize-1), uint32(0), uint32(0)
 	if d.progressive {
-		zigStart = int(d.tmp[1+2*nComp])
-		zigEnd = int(d.tmp[2+2*nComp])
-		ah = uint(d.tmp[3+2*nComp] >> 4)
-		al = uint(d.tmp[3+2*nComp] & 0x0f)
+		zigStart = int32(d.tmp[1+2*nComp])
+		zigEnd = int32(d.tmp[2+2*nComp])
+		ah = uint32(d.tmp[3+2*nComp] >> 4)
+		al = uint32(d.tmp[3+2*nComp] & 0x0f)
 		if (zigStart == 0 && zigEnd != 0) || zigStart > zigEnd || blockSize <= zigEnd {
 			return FormatError("bad spectral selection bounds")
 		}
@ -122,7 +122,7 @@ func (d *decoder) processSOS(n int) error {
 	var (
 		// b is the decoded coefficients, in natural (not zig-zag) order.
 		b  block
-		dc [nColorComponent]int
+		dc [nColorComponent]int32
 		// mx0 and my0 are the location of the current (in terms of 8x8 blocks).
 		// For example, with 4:2:0 chroma subsampling, the block whose top left
 		// pixel co-ordinates are (16, 8) is the third block in the first row:
@ -218,7 +218,7 @@ func (d *decoder) processSOS(n int) error {
 								val0 := value >> 4
 								val1 := value & 0x0f
 								if val1 != 0 {
-									zig += int(val0)
+									zig += int32(val0)
 									if zig > zigEnd {
 										break
 									}
@ -315,7 +315,7 @@ func (d *decoder) processSOS(n int) error {
 				// Reset the Huffman decoder.
 				d.b = bits{}
 				// Reset the DC components, as per section F.2.1.3.1.
-				dc = [nColorComponent]int{}
+				dc = [nColorComponent]int32{}
 				// Reset the progressive decoder state, as per section G.1.2.2.
 				d.eobRun = 0
 			}
@ -327,7 +327,7 @@ func (d *decoder) processSOS(n int) error {

 // refine decodes a successive approximation refinement block, as specified in
 // section G.1.2.
-func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) error {
+func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int32) error {
 	// Refining a DC component is trivial.
 	if zigStart == 0 {
 		if zigEnd != 0 {
@ -348,7 +348,7 @@ func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) erro
 	if d.eobRun == 0 {
 	loop:
 		for ; zig <= zigEnd; zig++ {
-			z := 0
+			z := int32(0)
 			value, err := d.decodeHuffman(h)
 			if err != nil {
 				return err
@ -382,7 +382,7 @@ func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) erro
 				return FormatError("unexpected Huffman code")
 			}

-			zig, err = d.refineNonZeroes(b, zig, zigEnd, int(val0), delta)
+			zig, err = d.refineNonZeroes(b, zig, zigEnd, int32(val0), delta)
 			if err != nil {
 				return err
 			}
@ -405,7 +405,7 @@ func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) erro

 // refineNonZeroes refines non-zero entries of b in zig-zag order. If nz >= 0,
 // the first nz zero entries are skipped over.
-func (d *decoder) refineNonZeroes(b *block, zig, zigEnd, nz, delta int) (int, error) {
+func (d *decoder) refineNonZeroes(b *block, zig, zigEnd, nz, delta int32) (int32, error) {
 	for ; zig <= zigEnd; zig++ {
 		u := unzig[zig]
 		if b[u] == 0 {
--- a/src/pkg/image/jpeg/writer.go
+++ b/src/pkg/image/jpeg/writer.go
@ -21,7 +21,7 @@ func min(x, y int) int {
 }

 // div returns a/b rounded to the nearest integer, instead of rounded to zero.
-func div(a int, b int) int {
+func div(a, b int32) int32 {
 	if a >= 0 {
 		return (a + (b >> 1)) / b
 	}
@ -268,14 +268,14 @@ func (e *encoder) emit(bits, nBits uint32) {
 }

 // emitHuff emits the given value with the given Huffman encoder.
-func (e *encoder) emitHuff(h huffIndex, value int) {
+func (e *encoder) emitHuff(h huffIndex, value int32) {
 	x := theHuffmanLUT[h][value]
 	e.emit(x&(1<<24-1), x>>24)
 }

 // emitHuffRLE emits a run of runLength copies of value encoded with the given
 // Huffman encoder.
-func (e *encoder) emitHuffRLE(h huffIndex, runLength, value int) {
+func (e *encoder) emitHuffRLE(h huffIndex, runLength, value int32) {
 	a, b := value, value
 	if a < 0 {
 		a, b = -value, value-1
@ -286,7 +286,7 @@ func (e *encoder) emitHuffRLE(h huffIndex, runLength, value int) {
 	} else {
 		nBits = 8 + uint32(bitCount[a>>8])
 	}
-	e.emitHuff(h, runLength<<4|int(nBits))
+	e.emitHuff(h, runLength<<4|int32(nBits))
 	if nBits > 0 {
 		e.emit(uint32(b)&(1<<nBits-1), nBits)
 	}
@ -347,15 +347,15 @@ func (e *encoder) writeDHT() {
 // writeBlock writes a block of pixel data using the given quantization table,
 // returning the post-quantized DC value of the DCT-transformed block.
 // b is in natural (not zig-zag) order.
-func (e *encoder) writeBlock(b *block, q quantIndex, prevDC int) int {
+func (e *encoder) writeBlock(b *block, q quantIndex, prevDC int32) int32 {
 	fdct(b)
 	// Emit the DC delta.
-	dc := div(b[0], (8 * int(e.quant[q][0])))
+	dc := div(b[0], 8*int32(e.quant[q][0]))
 	e.emitHuffRLE(huffIndex(2*q+0), 0, dc-prevDC)
 	// Emit the AC components.
-	h, runLength := huffIndex(2*q+1), 0
+	h, runLength := huffIndex(2*q+1), int32(0)
 	for zig := 1; zig < blockSize; zig++ {
-		ac := div(b[unzig[zig]], (8 * int(e.quant[q][zig])))
+		ac := div(b[unzig[zig]], 8*int32(e.quant[q][zig]))
 		if ac == 0 {
 			runLength++
 		} else {
@ -383,9 +383,9 @@ func toYCbCr(m image.Image, p image.Point, yBlock, cbBlock, crBlock *block) {
 		for i := 0; i < 8; i++ {
 			r, g, b, _ := m.At(min(p.X+i, xmax), min(p.Y+j, ymax)).RGBA()
 			yy, cb, cr := color.RGBToYCbCr(uint8(r>>8), uint8(g>>8), uint8(b>>8))
-			yBlock[8*j+i] = int(yy)
-			cbBlock[8*j+i] = int(cb)
-			crBlock[8*j+i] = int(cr)
+			yBlock[8*j+i] = int32(yy)
+			cbBlock[8*j+i] = int32(cb)
+			crBlock[8*j+i] = int32(cr)
 		}
 	}
 }
@ -408,9 +408,9 @@ func rgbaToYCbCr(m *image.RGBA, p image.Point, yBlock, cbBlock, crBlock *block)
 			}
 			pix := m.Pix[offset+sx*4:]
 			yy, cb, cr := color.RGBToYCbCr(pix[0], pix[1], pix[2])
-			yBlock[8*j+i] = int(yy)
-			cbBlock[8*j+i] = int(cb)
-			crBlock[8*j+i] = int(cr)
+			yBlock[8*j+i] = int32(yy)
+			cbBlock[8*j+i] = int32(cb)
+			crBlock[8*j+i] = int32(cr)
 		}
 	}
 }
@ -450,12 +450,10 @@ func (e *encoder) writeSOS(m image.Image) {
 	var (
 		// Scratch buffers to hold the YCbCr values.
 		// The blocks are in natural (not zig-zag) order.
-		yBlock  block
-		cbBlock [4]block
-		crBlock [4]block
-		cBlock  block
+		b      block
+		cb, cr [4]block
 		// DC components are delta-encoded.
-		prevDCY, prevDCCb, prevDCCr int
+		prevDCY, prevDCCb, prevDCCr int32
 	)
 	bounds := m.Bounds()
 	rgba, _ := m.(*image.RGBA)
@ -466,16 +464,16 @@ func (e *encoder) writeSOS(m image.Image) {
 				yOff := (i & 2) * 4
 				p := image.Pt(x+xOff, y+yOff)
 				if rgba != nil {
-					rgbaToYCbCr(rgba, p, &yBlock, &cbBlock[i], &crBlock[i])
+					rgbaToYCbCr(rgba, p, &b, &cb[i], &cr[i])
 				} else {
-					toYCbCr(m, p, &yBlock, &cbBlock[i], &crBlock[i])
+					toYCbCr(m, p, &b, &cb[i], &cr[i])
 				}
-				prevDCY = e.writeBlock(&yBlock, 0, prevDCY)
+				prevDCY = e.writeBlock(&b, 0, prevDCY)
 			}
-			scale(&cBlock, &cbBlock)
-			prevDCCb = e.writeBlock(&cBlock, 1, prevDCCb)
-			scale(&cBlock, &crBlock)
-			prevDCCr = e.writeBlock(&cBlock, 1, prevDCCr)
+			scale(&b, &cb)
+			prevDCCb = e.writeBlock(&b, 1, prevDCCb)
+			scale(&b, &cr)
+			prevDCCr = e.writeBlock(&b, 1, prevDCCr)
 		}
 	}
 	// Pad the last byte with 1's.