mirror of
https://github.com/golang/go
synced 2024-11-12 00:40:23 -07:00
image/color: optimize YCbCrToRGB
Use one comparison to detect underflow and overflow simultaneously. Use a shift, bitwise complement and uint8 type conversion to handle clamping to upper and lower bound without additional branching. Overall the new code is faster for a mix of common case, underflow and overflow. name old time/op new time/op delta YCbCr-2 1.12ms ± 0% 0.64ms ± 0% -43.01% (p=0.000 n=48+47) name old time/op new time/op delta YCbCrToRGB/0-2 5.52ns ± 0% 5.77ns ± 0% +4.48% (p=0.000 n=50+49) YCbCrToRGB/128-2 6.05ns ± 0% 5.52ns ± 0% -8.69% (p=0.000 n=39+50) YCbCrToRGB/255-2 5.80ns ± 0% 5.77ns ± 0% -0.58% (p=0.000 n=50+49) Found in collaboration with Josh Bleecher Snyder and Ralph Corderoy. Change-Id: Ic5020320f704966f545fdc1ae6bc24ddb5d3d09a Reviewed-on: https://go-review.googlesource.com/21910 Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
1650ced98f
commit
f0c5b8b9c9
@ -44,27 +44,44 @@ func YCbCrToRGB(y, cb, cr uint8) (uint8, uint8, uint8) {
|
||||
// B = Y' + 1.77200*(Cb-128)
|
||||
// http://www.w3.org/Graphics/JPEG/jfif3.pdf says Y but means Y'.
|
||||
|
||||
yy1 := int32(y) * 0x10100 // Convert 0x12 to 0x121200.
|
||||
yy1 := int32(y) * 0x010100 // Convert 0x12 to 0x121200.
|
||||
cb1 := int32(cb) - 128
|
||||
cr1 := int32(cr) - 128
|
||||
r := (yy1 + 91881*cr1) >> 16
|
||||
g := (yy1 - 22554*cb1 - 46802*cr1) >> 16
|
||||
b := (yy1 + 116130*cb1) >> 16
|
||||
if r < 0 {
|
||||
r = 0
|
||||
} else if r > 0xff {
|
||||
r = 0xff
|
||||
|
||||
// The bit twiddling below is equivalent to
|
||||
//
|
||||
// r := (yy1 + 91881*cr1) >> 16
|
||||
// if r < 0 {
|
||||
// r = 0
|
||||
// } else if r > 0xff {
|
||||
// r = ^int32(0)
|
||||
// }
|
||||
//
|
||||
// but uses fewer branches and is faster.
|
||||
// Note that the uint8 type conversion in the return
|
||||
// statement will convert ^int32(0) to 0xff.
|
||||
// The code below to compute b and g uses a similar pattern.
|
||||
r := yy1 + 91881*cr1
|
||||
if uint32(r)&0xff000000 == 0 {
|
||||
r >>= 16
|
||||
} else {
|
||||
r = ^(r >> 31)
|
||||
}
|
||||
if g < 0 {
|
||||
g = 0
|
||||
} else if g > 0xff {
|
||||
g = 0xff
|
||||
|
||||
b := yy1 + 116130*cb1
|
||||
if uint32(b)&0xff000000 == 0 {
|
||||
b >>= 16
|
||||
} else {
|
||||
b = ^(b >> 31)
|
||||
}
|
||||
if b < 0 {
|
||||
b = 0
|
||||
} else if b > 0xff {
|
||||
b = 0xff
|
||||
|
||||
g := yy1 - 22554*cb1 - 46802*cr1
|
||||
if uint32(g)&0xff000000 == 0 {
|
||||
g >>= 16
|
||||
} else {
|
||||
g = ^(g >> 31)
|
||||
}
|
||||
|
||||
return uint8(r), uint8(g), uint8(b)
|
||||
}
|
||||
|
||||
|
@ -95,26 +95,42 @@ const sratioCase = `
|
||||
%s
|
||||
|
||||
// This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
|
||||
yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200.
|
||||
yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200.
|
||||
cb1 := int32(src.Cb[ci]) - 128
|
||||
cr1 := int32(src.Cr[ci]) - 128
|
||||
r := (yy1 + 91881*cr1) >> 16
|
||||
g := (yy1 - 22554*cb1 - 46802*cr1) >> 16
|
||||
b := (yy1 + 116130*cb1) >> 16
|
||||
if r < 0 {
|
||||
r = 0
|
||||
} else if r > 255 {
|
||||
r = 255
|
||||
|
||||
// The bit twiddling below is equivalent to
|
||||
//
|
||||
// r := (yy1 + 91881*cr1) >> 16
|
||||
// if r < 0 {
|
||||
// r = 0
|
||||
// } else if r > 0xff {
|
||||
// r = ^int32(0)
|
||||
// }
|
||||
//
|
||||
// but uses fewer branches and is faster.
|
||||
// Note that the uint8 type conversion in the return
|
||||
// statement will convert ^int32(0) to 0xff.
|
||||
// The code below to compute b and g uses a similar pattern.
|
||||
r := yy1 + 91881*cr1
|
||||
if uint32(r)&0xff000000 == 0 {
|
||||
r >>= 16
|
||||
} else {
|
||||
r = ^(r >> 31)
|
||||
}
|
||||
if g < 0 {
|
||||
g = 0
|
||||
} else if g > 255 {
|
||||
g = 255
|
||||
|
||||
b := yy1 + 116130*cb1
|
||||
if uint32(b)&0xff000000 == 0 {
|
||||
b >>= 16
|
||||
} else {
|
||||
b = ^(b >> 31)
|
||||
}
|
||||
if b < 0 {
|
||||
b = 0
|
||||
} else if b > 255 {
|
||||
b = 255
|
||||
|
||||
g := yy1 - 22554*cb1 - 46802*cr1
|
||||
if uint32(g)&0xff000000 == 0 {
|
||||
g >>= 16
|
||||
} else {
|
||||
g = ^(g >> 31)
|
||||
}
|
||||
|
||||
dpix[x+0] = uint8(r)
|
||||
|
@ -44,26 +44,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po
|
||||
for x := x0; x != x1; x, yi, ci = x+4, yi+1, ci+1 {
|
||||
|
||||
// This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
|
||||
yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200.
|
||||
yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200.
|
||||
cb1 := int32(src.Cb[ci]) - 128
|
||||
cr1 := int32(src.Cr[ci]) - 128
|
||||
r := (yy1 + 91881*cr1) >> 16
|
||||
g := (yy1 - 22554*cb1 - 46802*cr1) >> 16
|
||||
b := (yy1 + 116130*cb1) >> 16
|
||||
if r < 0 {
|
||||
r = 0
|
||||
} else if r > 255 {
|
||||
r = 255
|
||||
|
||||
// The bit twiddling below is equivalent to
|
||||
//
|
||||
// r := (yy1 + 91881*cr1) >> 16
|
||||
// if r < 0 {
|
||||
// r = 0
|
||||
// } else if r > 0xff {
|
||||
// r = ^int32(0)
|
||||
// }
|
||||
//
|
||||
// but uses fewer branches and is faster.
|
||||
// Note that the uint8 type conversion in the return
|
||||
// statement will convert ^int32(0) to 0xff.
|
||||
// The code below to compute b and g uses a similar pattern.
|
||||
r := yy1 + 91881*cr1
|
||||
if uint32(r)&0xff000000 == 0 {
|
||||
r >>= 16
|
||||
} else {
|
||||
r = ^(r >> 31)
|
||||
}
|
||||
if g < 0 {
|
||||
g = 0
|
||||
} else if g > 255 {
|
||||
g = 255
|
||||
|
||||
b := yy1 + 116130*cb1
|
||||
if uint32(b)&0xff000000 == 0 {
|
||||
b >>= 16
|
||||
} else {
|
||||
b = ^(b >> 31)
|
||||
}
|
||||
if b < 0 {
|
||||
b = 0
|
||||
} else if b > 255 {
|
||||
b = 255
|
||||
|
||||
g := yy1 - 22554*cb1 - 46802*cr1
|
||||
if uint32(g)&0xff000000 == 0 {
|
||||
g >>= 16
|
||||
} else {
|
||||
g = ^(g >> 31)
|
||||
}
|
||||
|
||||
dpix[x+0] = uint8(r)
|
||||
@ -83,26 +99,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po
|
||||
ci := ciBase + sx/2
|
||||
|
||||
// This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
|
||||
yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200.
|
||||
yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200.
|
||||
cb1 := int32(src.Cb[ci]) - 128
|
||||
cr1 := int32(src.Cr[ci]) - 128
|
||||
r := (yy1 + 91881*cr1) >> 16
|
||||
g := (yy1 - 22554*cb1 - 46802*cr1) >> 16
|
||||
b := (yy1 + 116130*cb1) >> 16
|
||||
if r < 0 {
|
||||
r = 0
|
||||
} else if r > 255 {
|
||||
r = 255
|
||||
|
||||
// The bit twiddling below is equivalent to
|
||||
//
|
||||
// r := (yy1 + 91881*cr1) >> 16
|
||||
// if r < 0 {
|
||||
// r = 0
|
||||
// } else if r > 0xff {
|
||||
// r = ^int32(0)
|
||||
// }
|
||||
//
|
||||
// but uses fewer branches and is faster.
|
||||
// Note that the uint8 type conversion in the return
|
||||
// statement will convert ^int32(0) to 0xff.
|
||||
// The code below to compute b and g uses a similar pattern.
|
||||
r := yy1 + 91881*cr1
|
||||
if uint32(r)&0xff000000 == 0 {
|
||||
r >>= 16
|
||||
} else {
|
||||
r = ^(r >> 31)
|
||||
}
|
||||
if g < 0 {
|
||||
g = 0
|
||||
} else if g > 255 {
|
||||
g = 255
|
||||
|
||||
b := yy1 + 116130*cb1
|
||||
if uint32(b)&0xff000000 == 0 {
|
||||
b >>= 16
|
||||
} else {
|
||||
b = ^(b >> 31)
|
||||
}
|
||||
if b < 0 {
|
||||
b = 0
|
||||
} else if b > 255 {
|
||||
b = 255
|
||||
|
||||
g := yy1 - 22554*cb1 - 46802*cr1
|
||||
if uint32(g)&0xff000000 == 0 {
|
||||
g >>= 16
|
||||
} else {
|
||||
g = ^(g >> 31)
|
||||
}
|
||||
|
||||
dpix[x+0] = uint8(r)
|
||||
@ -122,26 +154,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po
|
||||
ci := ciBase + sx/2
|
||||
|
||||
// This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
|
||||
yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200.
|
||||
yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200.
|
||||
cb1 := int32(src.Cb[ci]) - 128
|
||||
cr1 := int32(src.Cr[ci]) - 128
|
||||
r := (yy1 + 91881*cr1) >> 16
|
||||
g := (yy1 - 22554*cb1 - 46802*cr1) >> 16
|
||||
b := (yy1 + 116130*cb1) >> 16
|
||||
if r < 0 {
|
||||
r = 0
|
||||
} else if r > 255 {
|
||||
r = 255
|
||||
|
||||
// The bit twiddling below is equivalent to
|
||||
//
|
||||
// r := (yy1 + 91881*cr1) >> 16
|
||||
// if r < 0 {
|
||||
// r = 0
|
||||
// } else if r > 0xff {
|
||||
// r = ^int32(0)
|
||||
// }
|
||||
//
|
||||
// but uses fewer branches and is faster.
|
||||
// Note that the uint8 type conversion in the return
|
||||
// statement will convert ^int32(0) to 0xff.
|
||||
// The code below to compute b and g uses a similar pattern.
|
||||
r := yy1 + 91881*cr1
|
||||
if uint32(r)&0xff000000 == 0 {
|
||||
r >>= 16
|
||||
} else {
|
||||
r = ^(r >> 31)
|
||||
}
|
||||
if g < 0 {
|
||||
g = 0
|
||||
} else if g > 255 {
|
||||
g = 255
|
||||
|
||||
b := yy1 + 116130*cb1
|
||||
if uint32(b)&0xff000000 == 0 {
|
||||
b >>= 16
|
||||
} else {
|
||||
b = ^(b >> 31)
|
||||
}
|
||||
if b < 0 {
|
||||
b = 0
|
||||
} else if b > 255 {
|
||||
b = 255
|
||||
|
||||
g := yy1 - 22554*cb1 - 46802*cr1
|
||||
if uint32(g)&0xff000000 == 0 {
|
||||
g >>= 16
|
||||
} else {
|
||||
g = ^(g >> 31)
|
||||
}
|
||||
|
||||
dpix[x+0] = uint8(r)
|
||||
@ -160,26 +208,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po
|
||||
for x := x0; x != x1; x, yi, ci = x+4, yi+1, ci+1 {
|
||||
|
||||
// This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
|
||||
yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200.
|
||||
yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200.
|
||||
cb1 := int32(src.Cb[ci]) - 128
|
||||
cr1 := int32(src.Cr[ci]) - 128
|
||||
r := (yy1 + 91881*cr1) >> 16
|
||||
g := (yy1 - 22554*cb1 - 46802*cr1) >> 16
|
||||
b := (yy1 + 116130*cb1) >> 16
|
||||
if r < 0 {
|
||||
r = 0
|
||||
} else if r > 255 {
|
||||
r = 255
|
||||
|
||||
// The bit twiddling below is equivalent to
|
||||
//
|
||||
// r := (yy1 + 91881*cr1) >> 16
|
||||
// if r < 0 {
|
||||
// r = 0
|
||||
// } else if r > 0xff {
|
||||
// r = ^int32(0)
|
||||
// }
|
||||
//
|
||||
// but uses fewer branches and is faster.
|
||||
// Note that the uint8 type conversion in the return
|
||||
// statement will convert ^int32(0) to 0xff.
|
||||
// The code below to compute b and g uses a similar pattern.
|
||||
r := yy1 + 91881*cr1
|
||||
if uint32(r)&0xff000000 == 0 {
|
||||
r >>= 16
|
||||
} else {
|
||||
r = ^(r >> 31)
|
||||
}
|
||||
if g < 0 {
|
||||
g = 0
|
||||
} else if g > 255 {
|
||||
g = 255
|
||||
|
||||
b := yy1 + 116130*cb1
|
||||
if uint32(b)&0xff000000 == 0 {
|
||||
b >>= 16
|
||||
} else {
|
||||
b = ^(b >> 31)
|
||||
}
|
||||
if b < 0 {
|
||||
b = 0
|
||||
} else if b > 255 {
|
||||
b = 255
|
||||
|
||||
g := yy1 - 22554*cb1 - 46802*cr1
|
||||
if uint32(g)&0xff000000 == 0 {
|
||||
g >>= 16
|
||||
} else {
|
||||
g = ^(g >> 31)
|
||||
}
|
||||
|
||||
dpix[x+0] = uint8(r)
|
||||
|
Loading…
Reference in New Issue
Block a user