From f30719dc89c2a41502fa584b790943170ad2d1ce Mon Sep 17 00:00:00 2001 From: Robert Griesemer Date: Wed, 28 Sep 2011 22:36:52 -0700 Subject: [PATCH] encoding/binary: support for varint encoding R=rsc, r, nigeltao, r, dsymonds CC=golang-dev https://golang.org/cl/5146048 --- src/pkg/encoding/binary/Makefile | 1 + src/pkg/encoding/binary/binary.go | 6 +- src/pkg/encoding/binary/varint.go | 163 +++++++++++++++++++++++ src/pkg/encoding/binary/varint_test.go | 175 +++++++++++++++++++++++++ 4 files changed, 342 insertions(+), 3 deletions(-) create mode 100644 src/pkg/encoding/binary/varint.go create mode 100644 src/pkg/encoding/binary/varint_test.go diff --git a/src/pkg/encoding/binary/Makefile b/src/pkg/encoding/binary/Makefile index dc46abe909d..3246f5a387a 100644 --- a/src/pkg/encoding/binary/Makefile +++ b/src/pkg/encoding/binary/Makefile @@ -7,5 +7,6 @@ include ../../../Make.inc TARG=encoding/binary GOFILES=\ binary.go\ + varint.go\ include ../../../Make.pkg diff --git a/src/pkg/encoding/binary/binary.go b/src/pkg/encoding/binary/binary.go index 8e55cb23b7c..c58f73694b8 100644 --- a/src/pkg/encoding/binary/binary.go +++ b/src/pkg/encoding/binary/binary.go @@ -17,9 +17,9 @@ import ( // A ByteOrder specifies how to convert byte sequences into // 16-, 32-, or 64-bit unsigned integers. type ByteOrder interface { - Uint16(b []byte) uint16 - Uint32(b []byte) uint32 - Uint64(b []byte) uint64 + Uint16([]byte) uint16 + Uint32([]byte) uint32 + Uint64([]byte) uint64 PutUint16([]byte, uint16) PutUint32([]byte, uint32) PutUint64([]byte, uint64) diff --git a/src/pkg/encoding/binary/varint.go b/src/pkg/encoding/binary/varint.go new file mode 100644 index 00000000000..1439dd3faae --- /dev/null +++ b/src/pkg/encoding/binary/varint.go @@ -0,0 +1,163 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package binary + +// This file implements "varint" encoding of 64-bit integers. +// The encoding is: +// - unsigned integers are serialized 7 bits at a time, starting with the +// least significant bits +// - the most significant bit (msb) in each output byte indicates if there +// is a continuation byte (msb = 1) +// - signed integers are mapped to unsigned integers using "zig-zag" +// encoding: Positive values x are written as 2*x + 0, negative values +// are written as 2*(^x) + 1; that is, negative numbers are complemented +// and whether to complement is encoded in bit 0. +// +// Design note: +// At most 10 bytes are needed for 64-bit values. The encoding could +// be more dense: a full 64-bit value needs an extra byte just to hold bit 63. +// Instead, the msb of the previous byte could be used to hold bit 63 since we +// know there can't be more than 64 bits. This is a trivial improvement and +// would reduce the maximum encoding length to 9 bytes. However, it breaks the +// invariant that the msb is always the "continuation bit" and thus makes the +// format incompatible with a varint encoding for larger numbers (say 128-bit). + +import ( + "io" + "os" +) + +// MaxVarintLenN is the maximum length of a varint-encoded N-bit integer. +const ( + MaxVarintLen16 = 3 + MaxVarintLen32 = 5 + MaxVarintLen64 = 10 +) + +// PutUvarint encodes a uint64 into buf and returns the number of bytes written. +// If the buffer is too small, the result is the negated number of bytes required +// (that is, -PutUvarint(nil, x) is the number of bytes required to encode x). +func PutUvarint(buf []byte, x uint64) int { + var i int + for i = range buf { + if x < 0x80 { + buf[i] = byte(x) + return i + 1 + } + buf[i] = byte(x) | 0x80 + x >>= 7 + } + // buffer too small; compute number of bytes required + for x >= 0x4000 { + x >>= 2 * 7 + i += 2 + } + if x >= 0x80 { + i++ + } + return -(i + 1) +} + +// Uvarint decodes a uint64 from buf and returns that value and the +// number of bytes read (> 0). If an error occurred, the value is 0 +// and the number of bytes n is <= 0 meaning: +// +// n == 0: buf too small +// n < 0: value larger than 64 bits (overflow) +// and -n is the number of bytes read +// +func Uvarint(buf []byte) (uint64, int) { + var x uint64 + var s uint + for i, b := range buf { + if b < 0x80 { + if i > 9 || i == 9 && b > 1 { + return 0, -(i + 1) // overflow + } + return x | uint64(b)< 0). If an error occurred, the value is 0 +// and the number of bytes n is <= 0 with the following meaning: +// +// n == 0: buf too small +// n < 0: value larger than 64 bits (overflow) +// and -n is the number of bytes read +// +func Varint(buf []byte) (int64, int) { + ux, n := Uvarint(buf) // ok to continue in presence of error + x := int64(ux >> 1) + if ux&1 != 0 { + x = ^x + } + return x, n +} + +// WriteUvarint encodes x and writes the result to w. +func WriteUvarint(w io.Writer, x uint64) os.Error { + var buf [MaxVarintLen64]byte + n := PutUvarint(buf[:], x) // won't fail + _, err := w.Write(buf[0:n]) + return err +} + +var overflow = os.NewError("binary: varint overflows a 64-bit integer") + +// ReadUvarint reads an encoded unsigned integer from r and returns it as a uint64. +func ReadUvarint(r io.ByteReader) (uint64, os.Error) { + var x uint64 + var s uint + for i := 0; ; i++ { + b, err := r.ReadByte() + if err != nil { + return x, err + } + if b < 0x80 { + if i > 9 || i == 9 && b > 1 { + return x, overflow + } + return x | uint64(b)<> 1) + if ux&1 != 0 { + x = ^x + } + return x, err +} diff --git a/src/pkg/encoding/binary/varint_test.go b/src/pkg/encoding/binary/varint_test.go new file mode 100644 index 00000000000..a85aceeeac6 --- /dev/null +++ b/src/pkg/encoding/binary/varint_test.go @@ -0,0 +1,175 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package binary + +import ( + "bytes" + "os" + "testing" +) + +func testConstant(t *testing.T, w uint, max int) { + n := -PutUvarint(nil, 1<