2009-07-09 15:33:43 -06:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
2009-07-27 12:02:06 -06:00
|
|
|
/*
|
|
|
|
The gob package manages streams of gobs - binary values exchanged between an
|
|
|
|
Encoder (transmitter) and a Decoder (receiver). A typical use is transporting
|
|
|
|
arguments and results of remote procedure calls (RPCs) such as those provided by
|
|
|
|
package "rpc".
|
|
|
|
|
|
|
|
A stream of gobs is self-describing. Each data item in the stream is preceded by
|
|
|
|
a specification of its type, expressed in terms of a small set of predefined
|
|
|
|
types. Pointers are not transmitted, but the things they point to are
|
|
|
|
transmitted; that is, the values are flattened. Recursive types work fine, but
|
|
|
|
recursive values (data with cycles) are problematic. This may change.
|
|
|
|
|
|
|
|
To use gobs, create an Encoder and present it with a series of data items as
|
|
|
|
values or addresses that can be dereferenced to values. (At the moment, these
|
|
|
|
items must be structs (struct, *struct, **struct etc.), but this may change.) The
|
|
|
|
Encoder makes sure all type information is sent before it is needed. At the
|
|
|
|
receive side, a Decoder retrieves values from the encoded stream and unpacks them
|
|
|
|
into local variables.
|
|
|
|
|
|
|
|
The source and destination values/types need not correspond exactly. For structs,
|
|
|
|
fields (identified by name) that are in the source but absent from the receiving
|
|
|
|
variable will be ignored. Fields that are in the receiving variable but missing
|
|
|
|
from the transmitted type or value will be ignored in the destination. If a field
|
|
|
|
with the same name is present in both, their types must be compatible. Both the
|
|
|
|
receiver and transmitter will do all necessary indirection and dereferencing to
|
|
|
|
convert between gobs and actual Go values. For instance, a gob type that is
|
|
|
|
schematically,
|
|
|
|
|
|
|
|
struct { a, b int }
|
|
|
|
|
|
|
|
can be sent from or received into any of these Go types:
|
|
|
|
|
|
|
|
struct { a, b int } // the same
|
|
|
|
*struct { a, b int } // extra indirection of the struct
|
|
|
|
struct { *a, **b int } // extra indirection of the fields
|
|
|
|
struct { a, b int64 } // different concrete value type; see below
|
|
|
|
|
|
|
|
It may also be received into any of these:
|
|
|
|
|
|
|
|
struct { a, b int } // the same
|
|
|
|
struct { b, a int } // ordering doesn't matter; matching is by name
|
|
|
|
struct { a, b, c int } // extra field (c) ignored
|
|
|
|
struct { b int } // missing field (a) ignored; data will be dropped
|
|
|
|
struct { b, c int } // missing field (a) ignored; extra field (c) ignored.
|
|
|
|
|
|
|
|
Attempting to receive into these types will draw a decode error:
|
|
|
|
|
|
|
|
struct { a int; b uint } // change of signedness for b
|
|
|
|
struct { a int; b float } // change of type for b
|
2009-10-06 20:41:51 -06:00
|
|
|
struct { } // no field names in common
|
|
|
|
struct { c, d int } // no field names in common
|
2009-07-27 12:02:06 -06:00
|
|
|
|
|
|
|
Integers are transmitted two ways: arbitrary precision signed integers or
|
|
|
|
arbitrary precision unsigned integers. There is no int8, int16 etc.
|
|
|
|
discrimination in the gob format; there are only signed and unsigned integers. As
|
|
|
|
described below, the transmitter sends the value in a variable-length encoding;
|
|
|
|
the receiver accepts the value and stores it in the destination variable.
|
|
|
|
Floating-point numbers are always sent using IEEE-754 64-bit precision (see
|
|
|
|
below).
|
|
|
|
|
|
|
|
Signed integers may be received into any signed integer variable: int, int16, etc.;
|
|
|
|
unsigned integers may be received into any unsigned integer variable; and floating
|
|
|
|
point values may be received into any floating point variable. However,
|
|
|
|
the destination variable must be able to represent the value or the decode
|
2009-07-28 13:59:39 -06:00
|
|
|
operation will fail.
|
2009-07-27 12:02:06 -06:00
|
|
|
|
|
|
|
Structs, arrays and slices are also supported. Strings and arrays of bytes are
|
|
|
|
supported with a special, efficient representation (see below).
|
|
|
|
|
|
|
|
Maps are not supported yet, but they will be. Interfaces, functions, and channels
|
|
|
|
cannot be sent in a gob. Attempting to encode a value that contains one will
|
2009-07-29 18:24:25 -06:00
|
|
|
fail.
|
2009-07-27 12:02:06 -06:00
|
|
|
|
|
|
|
The rest of this comment documents the encoding, details that are not important
|
|
|
|
for most users. Details are presented bottom-up.
|
|
|
|
|
2009-07-28 18:20:19 -06:00
|
|
|
An unsigned integer is sent one of two ways. If it is less than 128, it is sent
|
|
|
|
as a byte with that value. Otherwise it is sent as a minimal-length big-endian
|
|
|
|
(high byte first) byte stream holding the value, preceded by one byte holding the
|
|
|
|
byte count, negated. Thus 0 is transmitted as (00), 7 is transmitted as (07) and
|
|
|
|
256 is transmitted as (FE 01 00).
|
2009-07-27 12:02:06 -06:00
|
|
|
|
|
|
|
A boolean is encoded within an unsigned integer: 0 for false, 1 for true.
|
|
|
|
|
|
|
|
A signed integer, i, is encoded within an unsigned integer, u. Within u, bits 1
|
|
|
|
upward contain the value; bit 0 says whether they should be complemented upon
|
|
|
|
receipt. The encode algorithm looks like this:
|
|
|
|
|
|
|
|
uint u;
|
|
|
|
if i < 0 {
|
|
|
|
u = (^i << 1) | 1 // complement i, bit 0 is 1
|
|
|
|
} else {
|
|
|
|
u = (i << 1) // do not complement i, bit 0 is 0
|
|
|
|
}
|
|
|
|
encodeUnsigned(u)
|
|
|
|
|
|
|
|
The low bit is therefore analogous to a sign bit, but making it the complement bit
|
|
|
|
instead guarantees that the largest negative integer is not a special case. For
|
|
|
|
example, -129=^128=(^256>>1) encodes as (01 82).
|
|
|
|
|
|
|
|
Floating-point numbers are always sent as a representation of a float64 value.
|
|
|
|
That value is converted to a uint64 using math.Float64bits. The uint64 is then
|
|
|
|
byte-reversed and sent as a regular unsigned integer. The byte-reversal means the
|
|
|
|
exponent and high-precision part of the mantissa go first. Since the low bits are
|
|
|
|
often zero, this can save encoding bytes. For instance, 17.0 is encoded in only
|
|
|
|
two bytes (40 e2).
|
|
|
|
|
|
|
|
Strings and slices of bytes are sent as an unsigned count followed by that many
|
|
|
|
uninterpreted bytes of the value.
|
|
|
|
|
|
|
|
All other slices and arrays are sent as an unsigned count followed by that many
|
|
|
|
elements using the standard gob encoding for their type, recursively.
|
|
|
|
|
|
|
|
Structs are sent as a sequence of (field number, field value) pairs. The field
|
|
|
|
value is sent using the standard gob encoding for its type, recursively. If a
|
|
|
|
field has the zero value for its type, it is omitted from the transmission. The
|
|
|
|
field number is defined by the type of the encoded struct: the first field of the
|
|
|
|
encoded type is field 0, the second is field 1, etc. When encoding a value, the
|
|
|
|
field numbers are delta encoded for efficiency and the fields are always sent in
|
|
|
|
order of increasing field number; the deltas are therefore unsigned. The
|
|
|
|
initialization for the delta encoding sets the field number to -1, so an unsigned
|
|
|
|
integer field 0 with value 7 is transmitted as unsigned delta = 1, unsigned value
|
|
|
|
= 7 or (81 87). Finally, after all the fields have been sent a terminating mark
|
|
|
|
denotes the end of the struct. That mark is a delta=0 value, which has
|
|
|
|
representation (80).
|
|
|
|
|
|
|
|
The representation of types is described below. When a type is defined on a given
|
|
|
|
connection between an Encoder and Decoder, it is assigned a signed integer type
|
|
|
|
id. When Encoder.Encode(v) is called, it makes sure there is an id assigned for
|
|
|
|
the type of v and all its elements and then it sends the pair (typeid, encoded-v)
|
|
|
|
where typeid is the type id of the encoded type of v and encoded-v is the gob
|
|
|
|
encoding of the value v.
|
|
|
|
|
|
|
|
To define a type, the encoder chooses an unused, positive type id and sends the
|
|
|
|
pair (-type id, encoded-type) where encoded-type is the gob encoding of a wireType
|
|
|
|
description, constructed from these types:
|
|
|
|
|
|
|
|
type wireType struct {
|
|
|
|
s structType;
|
|
|
|
}
|
|
|
|
type fieldType struct {
|
|
|
|
name string; // the name of the field.
|
|
|
|
id int; // the type id of the field, which must be already defined
|
|
|
|
}
|
|
|
|
type commonType {
|
|
|
|
name string; // the name of the struct type
|
|
|
|
id int; // the id of the type, repeated for so it's inside the type
|
|
|
|
}
|
|
|
|
type structType struct {
|
|
|
|
commonType;
|
|
|
|
field []fieldType; // the fields of the struct.
|
|
|
|
}
|
|
|
|
|
|
|
|
If there are nested type ids, the types for all inner type ids must be defined
|
|
|
|
before the top-level type id is used to describe an encoded-v.
|
|
|
|
|
|
|
|
For simplicity in setup, the connection is defined to understand these types a
|
|
|
|
priori, as well as the basic gob types int, uint, etc. Their ids are:
|
|
|
|
|
2009-10-06 20:41:51 -06:00
|
|
|
bool 1
|
|
|
|
int 2
|
|
|
|
uint 3
|
|
|
|
float 4
|
|
|
|
[]byte 5
|
|
|
|
string 6
|
2009-07-27 12:02:06 -06:00
|
|
|
wireType 7
|
|
|
|
structType 8
|
|
|
|
commonType 9
|
|
|
|
fieldType 10
|
|
|
|
|
|
|
|
In summary, a gob stream looks like
|
|
|
|
|
|
|
|
((-type id, encoding of a wireType)* (type id, encoding of a value))*
|
|
|
|
|
|
|
|
where * signifies zero or more repetitions and the type id of a value must
|
|
|
|
be predefined or be defined before the value in the stream.
|
|
|
|
*/
|
2009-07-09 15:33:43 -06:00
|
|
|
package gob
|
|
|
|
|
|
|
|
import (
|
2009-07-15 17:10:17 -06:00
|
|
|
"bytes";
|
2009-07-09 15:33:43 -06:00
|
|
|
"io";
|
|
|
|
"os";
|
|
|
|
"reflect";
|
|
|
|
"sync";
|
|
|
|
)
|
|
|
|
|
2009-07-27 12:02:06 -06:00
|
|
|
// An Encoder manages the transmission of type and data information to the
|
|
|
|
// other side of a connection.
|
2009-07-09 15:33:43 -06:00
|
|
|
type Encoder struct {
|
2009-10-06 20:41:51 -06:00
|
|
|
mutex sync.Mutex; // each item must be sent atomically
|
|
|
|
w io.Writer; // where to send the data
|
|
|
|
sent map[reflect.Type]typeId; // which types we've already sent
|
|
|
|
state *encoderState; // so we can encode integers, strings directly
|
|
|
|
countState *encoderState; // stage for writing counts
|
|
|
|
buf []byte; // for collecting the output.
|
2009-07-09 15:33:43 -06:00
|
|
|
}
|
|
|
|
|
2009-07-27 12:02:06 -06:00
|
|
|
// NewEncoder returns a new encoder that will transmit on the io.Writer.
|
2009-07-09 15:33:43 -06:00
|
|
|
func NewEncoder(w io.Writer) *Encoder {
|
|
|
|
enc := new(Encoder);
|
2009-07-15 17:10:17 -06:00
|
|
|
enc.w = w;
|
2009-10-06 20:41:51 -06:00
|
|
|
enc.sent = make(map[reflect.Type]typeId);
|
2009-07-15 17:10:17 -06:00
|
|
|
enc.state = new(encoderState);
|
|
|
|
enc.state.b = new(bytes.Buffer); // the rest isn't important; all we need is buffer and writer
|
|
|
|
enc.countState = new(encoderState);
|
|
|
|
enc.countState.b = new(bytes.Buffer); // the rest isn't important; all we need is buffer and writer
|
2009-07-09 15:33:43 -06:00
|
|
|
return enc;
|
|
|
|
}
|
|
|
|
|
|
|
|
func (enc *Encoder) badType(rt reflect.Type) {
|
2009-12-03 18:12:57 -07:00
|
|
|
enc.setError(os.ErrorString("gob: can't encode type " + rt.String()))
|
|
|
|
}
|
|
|
|
|
|
|
|
func (enc *Encoder) setError(err os.Error) {
|
|
|
|
if enc.state.err == nil { // remember the first.
|
|
|
|
enc.state.err = err
|
|
|
|
}
|
|
|
|
enc.state.b.Reset();
|
2009-07-15 17:10:17 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// Send the data item preceded by a unsigned count of its length.
|
|
|
|
func (enc *Encoder) send() {
|
|
|
|
// Encode the length.
|
|
|
|
encodeUint(enc.countState, uint64(enc.state.b.Len()));
|
|
|
|
// Build the buffer.
|
|
|
|
countLen := enc.countState.b.Len();
|
|
|
|
total := countLen + enc.state.b.Len();
|
|
|
|
if total > len(enc.buf) {
|
2009-11-09 13:07:39 -07:00
|
|
|
enc.buf = make([]byte, total+1000) // extra for growth
|
2009-07-15 17:10:17 -06:00
|
|
|
}
|
|
|
|
// Place the length before the data.
|
|
|
|
// TODO(r): avoid the extra copy here.
|
|
|
|
enc.countState.b.Read(enc.buf[0:countLen]);
|
|
|
|
// Now the data.
|
|
|
|
enc.state.b.Read(enc.buf[countLen:total]);
|
|
|
|
// Write the data.
|
2009-12-03 18:12:57 -07:00
|
|
|
_, err := enc.w.Write(enc.buf[0:total]);
|
|
|
|
if err != nil {
|
|
|
|
enc.setError(err)
|
|
|
|
}
|
2009-07-09 15:33:43 -06:00
|
|
|
}
|
|
|
|
|
2009-11-17 00:32:30 -07:00
|
|
|
func (enc *Encoder) sendType(origt reflect.Type) {
|
2009-07-09 15:33:43 -06:00
|
|
|
// Drill down to the base type.
|
2009-09-15 10:41:59 -06:00
|
|
|
rt, _ := indirect(origt);
|
2009-07-09 15:33:43 -06:00
|
|
|
|
|
|
|
// We only send structs - everything else is basic or an error
|
2009-11-17 00:32:30 -07:00
|
|
|
switch rt := rt.(type) {
|
2009-09-09 11:32:26 -06:00
|
|
|
default:
|
2009-11-17 00:32:30 -07:00
|
|
|
// Basic types do not need to be described.
|
|
|
|
return
|
2009-12-01 16:31:28 -07:00
|
|
|
case reflect.ArrayOrSliceType:
|
|
|
|
// If it's []uint8, don't send; it's considered basic.
|
|
|
|
if _, ok := rt.Elem().(*reflect.Uint8Type); ok {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
// Otherwise we do send.
|
|
|
|
break;
|
|
|
|
// Struct types are not sent, only their element types.
|
2009-09-09 11:32:26 -06:00
|
|
|
case *reflect.StructType:
|
2009-11-09 13:07:39 -07:00
|
|
|
break
|
2009-09-09 11:32:26 -06:00
|
|
|
case *reflect.ChanType, *reflect.FuncType, *reflect.MapType, *reflect.InterfaceType:
|
|
|
|
// Probably a bad field in a struct.
|
2009-08-30 20:46:35 -06:00
|
|
|
enc.badType(rt);
|
|
|
|
return;
|
2009-07-09 15:33:43 -06:00
|
|
|
}
|
|
|
|
|
2009-07-10 14:44:37 -06:00
|
|
|
// Have we already sent this type? This time we ask about the base type.
|
2009-09-15 10:41:59 -06:00
|
|
|
if _, alreadySent := enc.sent[rt]; alreadySent {
|
2009-11-09 13:07:39 -07:00
|
|
|
return
|
2009-07-09 15:33:43 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// Need to send it.
|
2009-07-16 18:55:16 -06:00
|
|
|
typeLock.Lock();
|
2009-07-29 18:24:25 -06:00
|
|
|
info, err := getTypeInfo(rt);
|
2009-07-16 18:55:16 -06:00
|
|
|
typeLock.Unlock();
|
2009-07-29 18:24:25 -06:00
|
|
|
if err != nil {
|
2009-12-03 18:12:57 -07:00
|
|
|
enc.setError(err);
|
2009-07-29 18:24:25 -06:00
|
|
|
return;
|
|
|
|
}
|
2009-07-09 15:33:43 -06:00
|
|
|
// Send the pair (-id, type)
|
|
|
|
// Id:
|
2009-07-27 12:02:06 -06:00
|
|
|
encodeInt(enc.state, -int64(info.id));
|
2009-07-09 15:33:43 -06:00
|
|
|
// Type:
|
2009-07-15 17:10:17 -06:00
|
|
|
encode(enc.state.b, info.wire);
|
|
|
|
enc.send();
|
2009-12-03 18:12:57 -07:00
|
|
|
if enc.state.err != nil {
|
|
|
|
return
|
|
|
|
}
|
2009-07-15 17:10:17 -06:00
|
|
|
|
2009-07-09 15:33:43 -06:00
|
|
|
// Remember we've sent this type.
|
2009-07-27 12:02:06 -06:00
|
|
|
enc.sent[rt] = info.id;
|
2009-07-10 14:44:37 -06:00
|
|
|
// Remember we've sent the top-level, possibly indirect type too.
|
2009-07-27 12:02:06 -06:00
|
|
|
enc.sent[origt] = info.id;
|
2009-07-09 15:33:43 -06:00
|
|
|
// Now send the inner types
|
2009-12-01 16:31:28 -07:00
|
|
|
switch st := rt.(type) {
|
|
|
|
case *reflect.StructType:
|
|
|
|
for i := 0; i < st.NumField(); i++ {
|
|
|
|
enc.sendType(st.Field(i).Type)
|
|
|
|
}
|
|
|
|
case reflect.ArrayOrSliceType:
|
|
|
|
enc.sendType(st.Elem())
|
2009-07-09 15:33:43 -06:00
|
|
|
}
|
2009-10-06 20:41:51 -06:00
|
|
|
return;
|
2009-07-09 15:33:43 -06:00
|
|
|
}
|
|
|
|
|
2009-07-27 12:02:06 -06:00
|
|
|
// Encode transmits the data item represented by the empty interface value,
|
|
|
|
// guaranteeing that all necessary type information has been transmitted first.
|
2009-07-09 15:33:43 -06:00
|
|
|
func (enc *Encoder) Encode(e interface{}) os.Error {
|
2009-12-03 18:12:57 -07:00
|
|
|
// Make sure we're single-threaded through here, so multiple
|
|
|
|
// goroutines can share an encoder.
|
|
|
|
enc.mutex.Lock();
|
|
|
|
defer enc.mutex.Unlock();
|
|
|
|
|
|
|
|
enc.state.err = nil;
|
2009-09-15 10:41:59 -06:00
|
|
|
rt, _ := indirect(reflect.Typeof(e));
|
2009-11-17 00:32:30 -07:00
|
|
|
// Must be a struct
|
|
|
|
if _, ok := rt.(*reflect.StructType); !ok {
|
|
|
|
enc.badType(rt);
|
|
|
|
return enc.state.err;
|
|
|
|
}
|
|
|
|
|
2009-12-03 18:12:57 -07:00
|
|
|
// Sanity check only: encoder should never come in with data present.
|
|
|
|
if enc.state.b.Len() > 0 || enc.countState.b.Len() > 0 {
|
|
|
|
enc.state.err = os.ErrorString("encoder: buffer not empty");
|
|
|
|
return enc.state.err;
|
|
|
|
}
|
2009-07-09 15:33:43 -06:00
|
|
|
|
|
|
|
// Make sure the type is known to the other side.
|
2009-07-10 14:44:37 -06:00
|
|
|
// First, have we already sent this type?
|
2009-09-15 10:41:59 -06:00
|
|
|
if _, alreadySent := enc.sent[rt]; !alreadySent {
|
2009-07-10 14:44:37 -06:00
|
|
|
// No, so send it.
|
2009-11-17 00:32:30 -07:00
|
|
|
enc.sendType(rt);
|
2009-07-10 14:44:37 -06:00
|
|
|
if enc.state.err != nil {
|
2009-07-15 17:10:17 -06:00
|
|
|
enc.countState.b.Reset();
|
2009-10-06 20:41:51 -06:00
|
|
|
return enc.state.err;
|
2009-07-10 14:44:37 -06:00
|
|
|
}
|
2009-07-09 15:33:43 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// Identify the type of this top-level value.
|
2009-07-15 17:10:17 -06:00
|
|
|
encodeInt(enc.state, int64(enc.sent[rt]));
|
2009-07-09 15:33:43 -06:00
|
|
|
|
2009-07-15 17:10:17 -06:00
|
|
|
// Encode the object.
|
|
|
|
encode(enc.state.b, e);
|
|
|
|
enc.send();
|
2009-07-09 15:33:43 -06:00
|
|
|
|
2009-10-06 20:41:51 -06:00
|
|
|
return enc.state.err;
|
2009-07-09 15:33:43 -06:00
|
|
|
}
|