encoding/binary: add float support to fast path

This adds float type support to the main switch blocks in Read and Write, instead of falling back to reflection. This gives a considerable speedup for the float types. It's not clear to me why other tests show other speedups and some slowdowns. name old time/op new time/op delta ReadSlice1000Int32s-8 4.48µs ±15% 4.84µs ±15% ~ (p=0.073 n=7+7) ReadStruct-8 1.10µs ±12% 1.13µs ±13% ~ (p=0.702 n=6+7) ReadInts-8 267ns ±14% 266ns ± 8% ~ (p=1.000 n=6+7) WriteInts-8 322ns ±10% 306ns ± 6% ~ (p=0.189 n=6+7) WriteSlice1000Int32s-8 5.00µs ±30% 4.73µs ± 4% ~ (p=0.927 n=7+7) PutUint16-8 0.69ns ±14% 0.64ns ±10% ~ (p=0.091 n=7+7) PutUint32-8 0.69ns ±11% 0.66ns ±10% ~ (p=0.273 n=7+7) PutUint64-8 0.75ns ± 6% 0.76ns ± 2% ~ (p=0.223 n=7+6) LittleEndianPutUint16-8 0.50ns ±14% 0.63ns ± 6% +25.83% (p=0.001 n=7+7) LittleEndianPutUint32-8 0.62ns ± 7% 0.48ns ± 3% -23.41% (p=0.001 n=7+6) LittleEndianPutUint64-8 0.49ns ± 5% 0.62ns ± 6% +24.94% (p=0.001 n=6+7) ReadFloats-8 129ns ± 9% 70ns ± 8% -46.02% (p=0.001 n=7+7) WriteFloats-8 131ns ± 6% 86ns ±11% -34.59% (p=0.001 n=7+7) ReadSlice1000Float32s-8 14.6µs ±14% 4.8µs ±12% -67.29% (p=0.001 n=7+7) WriteSlice1000Float32s-8 16.4µs ±20% 4.7µs ± 8% -71.01% (p=0.001 n=7+7) PutUvarint32-8 24.2ns ±10% 24.9ns ± 7% ~ (p=0.271 n=7+7) PutUvarint64-8 64.2ns ± 9% 64.1ns ±15% ~ (p=0.738 n=7+7) [Geo mean] 57.1ns 47.1ns -17.59%
2024-11-17 14:14:56 -07:00 · 2019-03-28 11:13:08 +00:00 · 2019-03-28 11:13:08 +00:00 · 4ff326e99c
commit 4ff326e99c
parent 843fec1c7d
2 changed files with 108 additions and 0 deletions
--- a/src/encoding/binary/binary.go
+++ b/src/encoding/binary/binary.go
@ -184,6 +184,10 @@ func Read(r io.Reader, order ByteOrder, data interface{}) error {
 			*data = int64(order.Uint64(bs))
 		case *uint64:
 			*data = order.Uint64(bs)
+		case *float32:
+			*data = math.Float32frombits(order.Uint32(bs))
+		case *float64:
+			*data = math.Float64frombits(order.Uint64(bs))
 		case []bool:
 			for i, x := range bs { // Easier to loop over the input for 8-bit values.
 				data[i] = x != 0
@ -218,6 +222,14 @@ func Read(r io.Reader, order ByteOrder, data interface{}) error {
 			for i := range data {
 				data[i] = order.Uint64(bs[8*i:])
 			}
+		case []float32:
+			for i := range data {
+				data[i] = math.Float32frombits(order.Uint32(bs[4*i:]))
+			}
+		case []float64:
+			for i := range data {
+				data[i] = math.Float64frombits(order.Uint64(bs[8*i:]))
+			}
 		}
 		return nil
 	}
@ -338,6 +350,22 @@ func Write(w io.Writer, order ByteOrder, data interface{}) error {
 			for i, x := range v {
 				order.PutUint64(bs[8*i:], x)
 			}
+		case *float32:
+			order.PutUint32(bs, math.Float32bits(*v))
+		case float32:
+			order.PutUint32(bs, math.Float32bits(v))
+		case []float32:
+			for i, x := range v {
+				order.PutUint32(bs[4*i:], math.Float32bits(x))
+			}
+		case *float64:
+			order.PutUint64(bs, math.Float64bits(*v))
+		case float64:
+			order.PutUint64(bs, math.Float64bits(v))
+		case []float64:
+			for i, x := range v {
+				order.PutUint64(bs[8*i:], math.Float64bits(x))
+			}
 		}
 		_, err := w.Write(bs)
 		return err
@ -677,6 +705,14 @@ func intDataSize(data interface{}) int {
 		return 8 * len(data)
 	case []uint64:
 		return 8 * len(data)
+	case float32, *float32:
+		return 4
+	case float64, *float64:
+		return 8
+	case []float32:
+		return 4 * len(data)
+	case []float64:
+		return 8 * len(data)
 	}
 	return 0
 }
--- a/src/encoding/binary/binary_test.go
+++ b/src/encoding/binary/binary_test.go
@ -542,3 +542,75 @@ func BenchmarkLittleEndianPutUint64(b *testing.B) {
 		LittleEndian.PutUint64(putbuf[:], uint64(i))
 	}
 }
+
+func BenchmarkReadFloats(b *testing.B) {
+	var ls Struct
+	bsr := &byteSliceReader{}
+	var r io.Reader = bsr
+	b.SetBytes(4 + 8)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		bsr.remain = big[30:]
+		Read(r, BigEndian, &ls.Float32)
+		Read(r, BigEndian, &ls.Float64)
+	}
+	b.StopTimer()
+	want := s
+	want.Int8 = 0
+	want.Int16 = 0
+	want.Int32 = 0
+	want.Int64 = 0
+	want.Uint8 = 0
+	want.Uint16 = 0
+	want.Uint32 = 0
+	want.Uint64 = 0
+	want.Complex64 = 0
+	want.Complex128 = 0
+	want.Array = [4]uint8{0, 0, 0, 0}
+	want.Bool = false
+	want.BoolArray = [4]bool{false, false, false, false}
+	if b.N > 0 && !reflect.DeepEqual(ls, want) {
+		b.Fatalf("struct doesn't match:\ngot  %v;\nwant %v", ls, want)
+	}
+}
+
+func BenchmarkWriteFloats(b *testing.B) {
+	buf := new(bytes.Buffer)
+	var w io.Writer = buf
+	b.SetBytes(4 + 8)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		buf.Reset()
+		Write(w, BigEndian, s.Float32)
+		Write(w, BigEndian, s.Float64)
+	}
+	b.StopTimer()
+	if b.N > 0 && !bytes.Equal(buf.Bytes(), big[30:30+4+8]) {
+		b.Fatalf("first half doesn't match: %x %x", buf.Bytes(), big[30:30+4+8])
+	}
+}
+
+func BenchmarkReadSlice1000Float32s(b *testing.B) {
+	bsr := &byteSliceReader{}
+	slice := make([]float32, 1000)
+	buf := make([]byte, len(slice)*4)
+	b.SetBytes(int64(len(buf)))
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		bsr.remain = buf
+		Read(bsr, BigEndian, slice)
+	}
+}
+
+func BenchmarkWriteSlice1000Float32s(b *testing.B) {
+	slice := make([]float32, 1000)
+	buf := new(bytes.Buffer)
+	var w io.Writer = buf
+	b.SetBytes(4 * 1000)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		buf.Reset()
+		Write(w, BigEndian, slice)
+	}
+	b.StopTimer()
+}