image/color: optimize YCbCrToRGB

Use one comparison to detect underflow and overflow simultaneously. Use a shift, bitwise complement and uint8 type conversion to handle clamping to upper and lower bound without additional branching. Overall the new code is faster for a mix of common case, underflow and overflow. name old time/op new time/op delta YCbCr-2 1.12ms ± 0% 0.64ms ± 0% -43.01% (p=0.000 n=48+47) name old time/op new time/op delta YCbCrToRGB/0-2 5.52ns ± 0% 5.77ns ± 0% +4.48% (p=0.000 n=50+49) YCbCrToRGB/128-2 6.05ns ± 0% 5.52ns ± 0% -8.69% (p=0.000 n=39+50) YCbCrToRGB/255-2 5.80ns ± 0% 5.77ns ± 0% -0.58% (p=0.000 n=50+49) Found in collaboration with Josh Bleecher Snyder and Ralph Corderoy. Change-Id: Ic5020320f704966f545fdc1ae6bc24ddb5d3d09a Reviewed-on: https://go-review.googlesource.com/21910 Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2024-11-12 00:40:23 -07:00 · 2016-04-12 21:16:27 +02:00 · 2016-04-12 21:16:27 +02:00 · f0c5b8b9c9
commit f0c5b8b9c9
parent 1650ced98f
3 changed files with 193 additions and 96 deletions
--- a/src/image/color/ycbcr.go
+++ b/src/image/color/ycbcr.go
@ -44,27 +44,44 @@ func YCbCrToRGB(y, cb, cr uint8) (uint8, uint8, uint8) {
 	//	B = Y' + 1.77200*(Cb-128)
 	// http://www.w3.org/Graphics/JPEG/jfif3.pdf says Y but means Y'.

-	yy1 := int32(y) * 0x10100 // Convert 0x12 to 0x121200.
+	yy1 := int32(y) * 0x010100 // Convert 0x12 to 0x121200.
 	cb1 := int32(cb) - 128
 	cr1 := int32(cr) - 128
-	r := (yy1 + 91881*cr1) >> 16
-	g := (yy1 - 22554*cb1 - 46802*cr1) >> 16
-	b := (yy1 + 116130*cb1) >> 16
-	if r < 0 {
-		r = 0
-	} else if r > 0xff {
-		r = 0xff
+
+	// The bit twiddling below is equivalent to
+	//
+	// r := (yy1 + 91881*cr1) >> 16
+	// if r < 0 {
+	//     r = 0
+	// } else if r > 0xff {
+	//     r = ^int32(0)
+	// }
+	//
+	// but uses fewer branches and is faster.
+	// Note that the uint8 type conversion in the return
+	// statement will convert ^int32(0) to 0xff.
+	// The code below to compute b and g uses a similar pattern.
+	r := yy1 + 91881*cr1
+	if uint32(r)&0xff000000 == 0 {
+		r >>= 16
+	} else {
+		r = ^(r >> 31)
 	}
-	if g < 0 {
-		g = 0
-	} else if g > 0xff {
-		g = 0xff
+
+	b := yy1 + 116130*cb1
+	if uint32(b)&0xff000000 == 0 {
+		b >>= 16
+	} else {
+		b = ^(b >> 31)
 	}
-	if b < 0 {
-		b = 0
-	} else if b > 0xff {
-		b = 0xff
+
+	g := yy1 - 22554*cb1 - 46802*cr1
+	if uint32(g)&0xff000000 == 0 {
+		g >>= 16
+	} else {
+		g = ^(g >> 31)
 	}
+
 	return uint8(r), uint8(g), uint8(b)
 }

--- a/src/image/internal/imageutil/gen.go
+++ b/src/image/internal/imageutil/gen.go
@ -95,26 +95,42 @@ const sratioCase = `
 			%s

 				// This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
-				yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200.
+				yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200.
 				cb1 := int32(src.Cb[ci]) - 128
 				cr1 := int32(src.Cr[ci]) - 128
-				r := (yy1 + 91881*cr1) >> 16
-				g := (yy1 - 22554*cb1 - 46802*cr1) >> 16
-				b := (yy1 + 116130*cb1) >> 16
-				if r < 0 {
-					r = 0
-				} else if r > 255 {
-					r = 255
+
+				// The bit twiddling below is equivalent to
+				//
+				// r := (yy1 + 91881*cr1) >> 16
+				// if r < 0 {
+				//     r = 0
+				// } else if r > 0xff {
+				//     r = ^int32(0)
+				// }
+				//
+				// but uses fewer branches and is faster.
+				// Note that the uint8 type conversion in the return
+				// statement will convert ^int32(0) to 0xff.
+				// The code below to compute b and g uses a similar pattern.
+				r := yy1 + 91881*cr1
+				if uint32(r)&0xff000000 == 0 {
+					r >>= 16
+				} else {
+					r = ^(r >> 31)
 				}
-				if g < 0 {
-					g = 0
-				} else if g > 255 {
-					g = 255
+
+				b := yy1 + 116130*cb1
+				if uint32(b)&0xff000000 == 0 {
+					b >>= 16
+				} else {
+					b = ^(b >> 31)
 				}
-				if b < 0 {
-					b = 0
-				} else if b > 255 {
-					b = 255
+
+				g := yy1 - 22554*cb1 - 46802*cr1
+				if uint32(g)&0xff000000 == 0 {
+					g >>= 16
+				} else {
+					g = ^(g >> 31)
 				}

 				dpix[x+0] = uint8(r)
--- a/src/image/internal/imageutil/impl.go
+++ b/src/image/internal/imageutil/impl.go
@ -44,26 +44,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po
 			for x := x0; x != x1; x, yi, ci = x+4, yi+1, ci+1 {

 				// This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
-				yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200.
+				yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200.
 				cb1 := int32(src.Cb[ci]) - 128
 				cr1 := int32(src.Cr[ci]) - 128
-				r := (yy1 + 91881*cr1) >> 16
-				g := (yy1 - 22554*cb1 - 46802*cr1) >> 16
-				b := (yy1 + 116130*cb1) >> 16
-				if r < 0 {
-					r = 0
-				} else if r > 255 {
-					r = 255
+
+				// The bit twiddling below is equivalent to
+				//
+				// r := (yy1 + 91881*cr1) >> 16
+				// if r < 0 {
+				//     r = 0
+				// } else if r > 0xff {
+				//     r = ^int32(0)
+				// }
+				//
+				// but uses fewer branches and is faster.
+				// Note that the uint8 type conversion in the return
+				// statement will convert ^int32(0) to 0xff.
+				// The code below to compute b and g uses a similar pattern.
+				r := yy1 + 91881*cr1
+				if uint32(r)&0xff000000 == 0 {
+					r >>= 16
+				} else {
+					r = ^(r >> 31)
 				}
-				if g < 0 {
-					g = 0
-				} else if g > 255 {
-					g = 255
+
+				b := yy1 + 116130*cb1
+				if uint32(b)&0xff000000 == 0 {
+					b >>= 16
+				} else {
+					b = ^(b >> 31)
 				}
-				if b < 0 {
-					b = 0
-				} else if b > 255 {
-					b = 255
+
+				g := yy1 - 22554*cb1 - 46802*cr1
+				if uint32(g)&0xff000000 == 0 {
+					g >>= 16
+				} else {
+					g = ^(g >> 31)
 				}

 				dpix[x+0] = uint8(r)
@ -83,26 +99,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po
 				ci := ciBase + sx/2

 				// This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
-				yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200.
+				yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200.
 				cb1 := int32(src.Cb[ci]) - 128
 				cr1 := int32(src.Cr[ci]) - 128
-				r := (yy1 + 91881*cr1) >> 16
-				g := (yy1 - 22554*cb1 - 46802*cr1) >> 16
-				b := (yy1 + 116130*cb1) >> 16
-				if r < 0 {
-					r = 0
-				} else if r > 255 {
-					r = 255
+
+				// The bit twiddling below is equivalent to
+				//
+				// r := (yy1 + 91881*cr1) >> 16
+				// if r < 0 {
+				//     r = 0
+				// } else if r > 0xff {
+				//     r = ^int32(0)
+				// }
+				//
+				// but uses fewer branches and is faster.
+				// Note that the uint8 type conversion in the return
+				// statement will convert ^int32(0) to 0xff.
+				// The code below to compute b and g uses a similar pattern.
+				r := yy1 + 91881*cr1
+				if uint32(r)&0xff000000 == 0 {
+					r >>= 16
+				} else {
+					r = ^(r >> 31)
 				}
-				if g < 0 {
-					g = 0
-				} else if g > 255 {
-					g = 255
+
+				b := yy1 + 116130*cb1
+				if uint32(b)&0xff000000 == 0 {
+					b >>= 16
+				} else {
+					b = ^(b >> 31)
 				}
-				if b < 0 {
-					b = 0
-				} else if b > 255 {
-					b = 255
+
+				g := yy1 - 22554*cb1 - 46802*cr1
+				if uint32(g)&0xff000000 == 0 {
+					g >>= 16
+				} else {
+					g = ^(g >> 31)
 				}

 				dpix[x+0] = uint8(r)
@ -122,26 +154,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po
 				ci := ciBase + sx/2

 				// This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
-				yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200.
+				yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200.
 				cb1 := int32(src.Cb[ci]) - 128
 				cr1 := int32(src.Cr[ci]) - 128
-				r := (yy1 + 91881*cr1) >> 16
-				g := (yy1 - 22554*cb1 - 46802*cr1) >> 16
-				b := (yy1 + 116130*cb1) >> 16
-				if r < 0 {
-					r = 0
-				} else if r > 255 {
-					r = 255
+
+				// The bit twiddling below is equivalent to
+				//
+				// r := (yy1 + 91881*cr1) >> 16
+				// if r < 0 {
+				//     r = 0
+				// } else if r > 0xff {
+				//     r = ^int32(0)
+				// }
+				//
+				// but uses fewer branches and is faster.
+				// Note that the uint8 type conversion in the return
+				// statement will convert ^int32(0) to 0xff.
+				// The code below to compute b and g uses a similar pattern.
+				r := yy1 + 91881*cr1
+				if uint32(r)&0xff000000 == 0 {
+					r >>= 16
+				} else {
+					r = ^(r >> 31)
 				}
-				if g < 0 {
-					g = 0
-				} else if g > 255 {
-					g = 255
+
+				b := yy1 + 116130*cb1
+				if uint32(b)&0xff000000 == 0 {
+					b >>= 16
+				} else {
+					b = ^(b >> 31)
 				}
-				if b < 0 {
-					b = 0
-				} else if b > 255 {
-					b = 255
+
+				g := yy1 - 22554*cb1 - 46802*cr1
+				if uint32(g)&0xff000000 == 0 {
+					g >>= 16
+				} else {
+					g = ^(g >> 31)
 				}

 				dpix[x+0] = uint8(r)
@ -160,26 +208,42 @@ func DrawYCbCr(dst *image.RGBA, r image.Rectangle, src *image.YCbCr, sp image.Po
 			for x := x0; x != x1; x, yi, ci = x+4, yi+1, ci+1 {

 				// This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
-				yy1 := int32(src.Y[yi]) * 0x10100 // Convert 0x12 to 0x121200.
+				yy1 := int32(src.Y[yi]) * 0x010100 // Convert 0x12 to 0x121200.
 				cb1 := int32(src.Cb[ci]) - 128
 				cr1 := int32(src.Cr[ci]) - 128
-				r := (yy1 + 91881*cr1) >> 16
-				g := (yy1 - 22554*cb1 - 46802*cr1) >> 16
-				b := (yy1 + 116130*cb1) >> 16
-				if r < 0 {
-					r = 0
-				} else if r > 255 {
-					r = 255
+
+				// The bit twiddling below is equivalent to
+				//
+				// r := (yy1 + 91881*cr1) >> 16
+				// if r < 0 {
+				//     r = 0
+				// } else if r > 0xff {
+				//     r = ^int32(0)
+				// }
+				//
+				// but uses fewer branches and is faster.
+				// Note that the uint8 type conversion in the return
+				// statement will convert ^int32(0) to 0xff.
+				// The code below to compute b and g uses a similar pattern.
+				r := yy1 + 91881*cr1
+				if uint32(r)&0xff000000 == 0 {
+					r >>= 16
+				} else {
+					r = ^(r >> 31)
 				}
-				if g < 0 {
-					g = 0
-				} else if g > 255 {
-					g = 255
+
+				b := yy1 + 116130*cb1
+				if uint32(b)&0xff000000 == 0 {
+					b >>= 16
+				} else {
+					b = ^(b >> 31)
 				}
-				if b < 0 {
-					b = 0
-				} else if b > 255 {
-					b = 255
+
+				g := yy1 - 22554*cb1 - 46802*cr1
+				if uint32(g)&0xff000000 == 0 {
+					g >>= 16
+				} else {
+					g = ^(g >> 31)
 				}

 				dpix[x+0] = uint8(r)