mirror of
https://github.com/golang/go
synced 2024-11-20 05:44:44 -07:00
image/jpeg: change block from [64]int to [64]int32.
On 6g/linux: benchmark old ns/op new ns/op delta BenchmarkFDCT 4606 4241 -7.92% BenchmarkIDCT 4187 3923 -6.31% BenchmarkDecodeBaseline 3154864 3170224 +0.49% BenchmarkDecodeProgressive 4072812 4017132 -1.37% BenchmarkEncode 39406920 34596760 -12.21% Stack requirements before (from 'go tool 6g -S'): (scan.go:37) TEXT (*decoder).processSOS+0(SB),$1352-32 (writer.go:448) TEXT (*encoder).writeSOS+0(SB),$5344-24 after: (scan.go:37) TEXT (*decoder).processSOS+0(SB),$1064-32 (writer.go:448) TEXT (*encoder).writeSOS+0(SB),$2520-24 Also, in encoder.writeSOS, re-use the yBlock scratch buffer for Cb and Cr. This reduces the stack requirement slightly, but also avoids an unlucky coincidence where a BenchmarkEncode stack split lands between encoder.writeByte and bufio.Writer.WriteByte, which occurs very often during Huffman encoding and is otherwise disasterous for the final benchmark number. FWIW, the yBlock re-use *without* the s/int/int32/ change does not have a noticable effect on the benchmarks. R=r CC=golang-dev, rsc https://golang.org/cl/6823043
This commit is contained in:
parent
9c775353b9
commit
daf43ba476
@ -42,7 +42,7 @@ func TestDCT(t *testing.T) {
|
||||
b := block{}
|
||||
n := r.Int() % 64
|
||||
for j := 0; j < n; j++ {
|
||||
b[r.Int()%len(b)] = r.Int() % 256
|
||||
b[r.Int()%len(b)] = r.Int31() % 256
|
||||
}
|
||||
blocks = append(blocks, b)
|
||||
}
|
||||
@ -144,9 +144,9 @@ func slowFDCT(b *block) {
|
||||
dst[8*v+u] = sum / 8
|
||||
}
|
||||
}
|
||||
// Convert from float64 to int.
|
||||
// Convert from float64 to int32.
|
||||
for i := range dst {
|
||||
b[i] = int(dst[i] + 0.5)
|
||||
b[i] = int32(dst[i] + 0.5)
|
||||
}
|
||||
}
|
||||
|
||||
@ -174,9 +174,9 @@ func slowIDCT(b *block) {
|
||||
dst[8*y+x] = sum / 8
|
||||
}
|
||||
}
|
||||
// Convert from float64 to int.
|
||||
// Convert from float64 to int32.
|
||||
for i := range dst {
|
||||
b[i] = int(dst[i] + 0.5)
|
||||
b[i] = int32(dst[i] + 0.5)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -61,15 +61,15 @@ func (d *decoder) ensureNBits(n int) error {
|
||||
}
|
||||
|
||||
// The composition of RECEIVE and EXTEND, specified in section F.2.2.1.
|
||||
func (d *decoder) receiveExtend(t uint8) (int, error) {
|
||||
func (d *decoder) receiveExtend(t uint8) (int32, error) {
|
||||
err := d.ensureNBits(int(t))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
d.b.n -= int(t)
|
||||
d.b.m >>= t
|
||||
s := 1 << t
|
||||
x := int(d.b.a>>uint8(d.b.n)) & (s - 1)
|
||||
s := int32(1) << t
|
||||
x := int32(d.b.a>>uint8(d.b.n)) & (s - 1)
|
||||
if x < s>>1 {
|
||||
x += ((-1) << t) + 1
|
||||
}
|
||||
|
@ -39,7 +39,7 @@ package jpeg
|
||||
|
||||
const blockSize = 64 // A DCT block is 8x8.
|
||||
|
||||
type block [blockSize]int
|
||||
type block [blockSize]int32
|
||||
|
||||
const (
|
||||
w1 = 2841 // 2048*sqrt(2)*cos(1*pi/16)
|
||||
|
@ -187,7 +187,7 @@ func (d *decoder) processDQT(n int) error {
|
||||
return FormatError("bad Tq value")
|
||||
}
|
||||
for i := range d.quant[tq] {
|
||||
d.quant[tq][i] = int(d.tmp[i+1])
|
||||
d.quant[tq][i] = int32(d.tmp[i+1])
|
||||
}
|
||||
}
|
||||
if n != 0 {
|
||||
|
@ -86,12 +86,12 @@ func (d *decoder) processSOS(n int) error {
|
||||
// significant bit.
|
||||
//
|
||||
// For baseline JPEGs, these parameters are hard-coded to 0/63/0/0.
|
||||
zigStart, zigEnd, ah, al := 0, blockSize-1, uint(0), uint(0)
|
||||
zigStart, zigEnd, ah, al := int32(0), int32(blockSize-1), uint32(0), uint32(0)
|
||||
if d.progressive {
|
||||
zigStart = int(d.tmp[1+2*nComp])
|
||||
zigEnd = int(d.tmp[2+2*nComp])
|
||||
ah = uint(d.tmp[3+2*nComp] >> 4)
|
||||
al = uint(d.tmp[3+2*nComp] & 0x0f)
|
||||
zigStart = int32(d.tmp[1+2*nComp])
|
||||
zigEnd = int32(d.tmp[2+2*nComp])
|
||||
ah = uint32(d.tmp[3+2*nComp] >> 4)
|
||||
al = uint32(d.tmp[3+2*nComp] & 0x0f)
|
||||
if (zigStart == 0 && zigEnd != 0) || zigStart > zigEnd || blockSize <= zigEnd {
|
||||
return FormatError("bad spectral selection bounds")
|
||||
}
|
||||
@ -122,7 +122,7 @@ func (d *decoder) processSOS(n int) error {
|
||||
var (
|
||||
// b is the decoded coefficients, in natural (not zig-zag) order.
|
||||
b block
|
||||
dc [nColorComponent]int
|
||||
dc [nColorComponent]int32
|
||||
// mx0 and my0 are the location of the current (in terms of 8x8 blocks).
|
||||
// For example, with 4:2:0 chroma subsampling, the block whose top left
|
||||
// pixel co-ordinates are (16, 8) is the third block in the first row:
|
||||
@ -218,7 +218,7 @@ func (d *decoder) processSOS(n int) error {
|
||||
val0 := value >> 4
|
||||
val1 := value & 0x0f
|
||||
if val1 != 0 {
|
||||
zig += int(val0)
|
||||
zig += int32(val0)
|
||||
if zig > zigEnd {
|
||||
break
|
||||
}
|
||||
@ -315,7 +315,7 @@ func (d *decoder) processSOS(n int) error {
|
||||
// Reset the Huffman decoder.
|
||||
d.b = bits{}
|
||||
// Reset the DC components, as per section F.2.1.3.1.
|
||||
dc = [nColorComponent]int{}
|
||||
dc = [nColorComponent]int32{}
|
||||
// Reset the progressive decoder state, as per section G.1.2.2.
|
||||
d.eobRun = 0
|
||||
}
|
||||
@ -327,7 +327,7 @@ func (d *decoder) processSOS(n int) error {
|
||||
|
||||
// refine decodes a successive approximation refinement block, as specified in
|
||||
// section G.1.2.
|
||||
func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) error {
|
||||
func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int32) error {
|
||||
// Refining a DC component is trivial.
|
||||
if zigStart == 0 {
|
||||
if zigEnd != 0 {
|
||||
@ -348,7 +348,7 @@ func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) erro
|
||||
if d.eobRun == 0 {
|
||||
loop:
|
||||
for ; zig <= zigEnd; zig++ {
|
||||
z := 0
|
||||
z := int32(0)
|
||||
value, err := d.decodeHuffman(h)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -382,7 +382,7 @@ func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) erro
|
||||
return FormatError("unexpected Huffman code")
|
||||
}
|
||||
|
||||
zig, err = d.refineNonZeroes(b, zig, zigEnd, int(val0), delta)
|
||||
zig, err = d.refineNonZeroes(b, zig, zigEnd, int32(val0), delta)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -405,7 +405,7 @@ func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int) erro
|
||||
|
||||
// refineNonZeroes refines non-zero entries of b in zig-zag order. If nz >= 0,
|
||||
// the first nz zero entries are skipped over.
|
||||
func (d *decoder) refineNonZeroes(b *block, zig, zigEnd, nz, delta int) (int, error) {
|
||||
func (d *decoder) refineNonZeroes(b *block, zig, zigEnd, nz, delta int32) (int32, error) {
|
||||
for ; zig <= zigEnd; zig++ {
|
||||
u := unzig[zig]
|
||||
if b[u] == 0 {
|
||||
|
@ -21,7 +21,7 @@ func min(x, y int) int {
|
||||
}
|
||||
|
||||
// div returns a/b rounded to the nearest integer, instead of rounded to zero.
|
||||
func div(a int, b int) int {
|
||||
func div(a, b int32) int32 {
|
||||
if a >= 0 {
|
||||
return (a + (b >> 1)) / b
|
||||
}
|
||||
@ -268,14 +268,14 @@ func (e *encoder) emit(bits, nBits uint32) {
|
||||
}
|
||||
|
||||
// emitHuff emits the given value with the given Huffman encoder.
|
||||
func (e *encoder) emitHuff(h huffIndex, value int) {
|
||||
func (e *encoder) emitHuff(h huffIndex, value int32) {
|
||||
x := theHuffmanLUT[h][value]
|
||||
e.emit(x&(1<<24-1), x>>24)
|
||||
}
|
||||
|
||||
// emitHuffRLE emits a run of runLength copies of value encoded with the given
|
||||
// Huffman encoder.
|
||||
func (e *encoder) emitHuffRLE(h huffIndex, runLength, value int) {
|
||||
func (e *encoder) emitHuffRLE(h huffIndex, runLength, value int32) {
|
||||
a, b := value, value
|
||||
if a < 0 {
|
||||
a, b = -value, value-1
|
||||
@ -286,7 +286,7 @@ func (e *encoder) emitHuffRLE(h huffIndex, runLength, value int) {
|
||||
} else {
|
||||
nBits = 8 + uint32(bitCount[a>>8])
|
||||
}
|
||||
e.emitHuff(h, runLength<<4|int(nBits))
|
||||
e.emitHuff(h, runLength<<4|int32(nBits))
|
||||
if nBits > 0 {
|
||||
e.emit(uint32(b)&(1<<nBits-1), nBits)
|
||||
}
|
||||
@ -347,15 +347,15 @@ func (e *encoder) writeDHT() {
|
||||
// writeBlock writes a block of pixel data using the given quantization table,
|
||||
// returning the post-quantized DC value of the DCT-transformed block.
|
||||
// b is in natural (not zig-zag) order.
|
||||
func (e *encoder) writeBlock(b *block, q quantIndex, prevDC int) int {
|
||||
func (e *encoder) writeBlock(b *block, q quantIndex, prevDC int32) int32 {
|
||||
fdct(b)
|
||||
// Emit the DC delta.
|
||||
dc := div(b[0], (8 * int(e.quant[q][0])))
|
||||
dc := div(b[0], 8*int32(e.quant[q][0]))
|
||||
e.emitHuffRLE(huffIndex(2*q+0), 0, dc-prevDC)
|
||||
// Emit the AC components.
|
||||
h, runLength := huffIndex(2*q+1), 0
|
||||
h, runLength := huffIndex(2*q+1), int32(0)
|
||||
for zig := 1; zig < blockSize; zig++ {
|
||||
ac := div(b[unzig[zig]], (8 * int(e.quant[q][zig])))
|
||||
ac := div(b[unzig[zig]], 8*int32(e.quant[q][zig]))
|
||||
if ac == 0 {
|
||||
runLength++
|
||||
} else {
|
||||
@ -383,9 +383,9 @@ func toYCbCr(m image.Image, p image.Point, yBlock, cbBlock, crBlock *block) {
|
||||
for i := 0; i < 8; i++ {
|
||||
r, g, b, _ := m.At(min(p.X+i, xmax), min(p.Y+j, ymax)).RGBA()
|
||||
yy, cb, cr := color.RGBToYCbCr(uint8(r>>8), uint8(g>>8), uint8(b>>8))
|
||||
yBlock[8*j+i] = int(yy)
|
||||
cbBlock[8*j+i] = int(cb)
|
||||
crBlock[8*j+i] = int(cr)
|
||||
yBlock[8*j+i] = int32(yy)
|
||||
cbBlock[8*j+i] = int32(cb)
|
||||
crBlock[8*j+i] = int32(cr)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -408,9 +408,9 @@ func rgbaToYCbCr(m *image.RGBA, p image.Point, yBlock, cbBlock, crBlock *block)
|
||||
}
|
||||
pix := m.Pix[offset+sx*4:]
|
||||
yy, cb, cr := color.RGBToYCbCr(pix[0], pix[1], pix[2])
|
||||
yBlock[8*j+i] = int(yy)
|
||||
cbBlock[8*j+i] = int(cb)
|
||||
crBlock[8*j+i] = int(cr)
|
||||
yBlock[8*j+i] = int32(yy)
|
||||
cbBlock[8*j+i] = int32(cb)
|
||||
crBlock[8*j+i] = int32(cr)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -450,12 +450,10 @@ func (e *encoder) writeSOS(m image.Image) {
|
||||
var (
|
||||
// Scratch buffers to hold the YCbCr values.
|
||||
// The blocks are in natural (not zig-zag) order.
|
||||
yBlock block
|
||||
cbBlock [4]block
|
||||
crBlock [4]block
|
||||
cBlock block
|
||||
b block
|
||||
cb, cr [4]block
|
||||
// DC components are delta-encoded.
|
||||
prevDCY, prevDCCb, prevDCCr int
|
||||
prevDCY, prevDCCb, prevDCCr int32
|
||||
)
|
||||
bounds := m.Bounds()
|
||||
rgba, _ := m.(*image.RGBA)
|
||||
@ -466,16 +464,16 @@ func (e *encoder) writeSOS(m image.Image) {
|
||||
yOff := (i & 2) * 4
|
||||
p := image.Pt(x+xOff, y+yOff)
|
||||
if rgba != nil {
|
||||
rgbaToYCbCr(rgba, p, &yBlock, &cbBlock[i], &crBlock[i])
|
||||
rgbaToYCbCr(rgba, p, &b, &cb[i], &cr[i])
|
||||
} else {
|
||||
toYCbCr(m, p, &yBlock, &cbBlock[i], &crBlock[i])
|
||||
toYCbCr(m, p, &b, &cb[i], &cr[i])
|
||||
}
|
||||
prevDCY = e.writeBlock(&yBlock, 0, prevDCY)
|
||||
prevDCY = e.writeBlock(&b, 0, prevDCY)
|
||||
}
|
||||
scale(&cBlock, &cbBlock)
|
||||
prevDCCb = e.writeBlock(&cBlock, 1, prevDCCb)
|
||||
scale(&cBlock, &crBlock)
|
||||
prevDCCr = e.writeBlock(&cBlock, 1, prevDCCr)
|
||||
scale(&b, &cb)
|
||||
prevDCCb = e.writeBlock(&b, 1, prevDCCb)
|
||||
scale(&b, &cr)
|
||||
prevDCCr = e.writeBlock(&b, 1, prevDCCr)
|
||||
}
|
||||
}
|
||||
// Pad the last byte with 1's.
|
||||
|
Loading…
Reference in New Issue
Block a user