1
0
mirror of https://github.com/golang/go synced 2024-11-23 00:20:12 -07:00
go/usr/gri/pretty/format.go
Russ Cox 917aa35f8f implications of stricter type equality:
if both types are named, they must be
the same type (arising from the same
declaration).

R=r,gri
DELTA=44  (21 added, 4 deleted, 19 changed)
OCL=28436
CL=28577
2009-05-08 15:40:14 -07:00

966 lines
20 KiB
Go

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/* The format package implements syntax-directed formatting of arbitrary
data structures.
A format specification consists of a set of named productions in EBNF.
The production names correspond to the type names of the data structure
to be printed. The production expressions consist of literal values
(strings), references to fields, and alternative, grouped, optional,
and repetitive sub-expressions.
When printing a value, its type name is used to look up the production
to be printed. Literal values are printed as is, field references are
resolved and the respective field values are printed instead (using their
type-specific productions), and alternative, grouped, optional, and
repetitive sub-expressions are printed depending on whether they contain
"empty" fields or not. A field is empty if its value is nil.
*/
package format
import (
"flag";
"fmt";
"go/scanner";
"go/token";
"io";
"os";
"reflect";
"strconv";
"strings";
)
// TODO should probably do this in a different way
var (
debug = flag.Bool("d", false, "debug mode");
trace = flag.Bool("t", false, "trace mode");
)
// ----------------------------------------------------------------------------
// Format representation
type (
Formatter func(w io.Writer, value interface{}, name string) bool;
FormatterMap map[string]Formatter;
)
// A production expression is built from the following nodes.
//
type (
expr interface {
String() string;
};
alternative struct {
x, y expr;
};
sequence struct {
x, y expr;
};
literal struct {
// TODO should there be other types or should it all be string literals?
value []byte;
};
indentation struct {
iexpr expr; // outdent if nil
};
field struct {
fname string; // including "^", "*"
tname string; // "" if no tname specified
};
negation struct {
neg expr;
};
option struct {
opt expr;
};
repetition struct {
rep expr;
div expr;
};
custom struct {
name string;
form Formatter
};
)
func (x *alternative) String() string {
return fmt.Sprintf("(%v | %v)", x.x, x.y);
}
func (x *sequence) String() string {
return fmt.Sprintf("%v %v", x.x, x.y);
}
func (x *literal) String() string {
return strconv.Quote(string(x.value));
}
func (x *indentation) String() string {
if x.iexpr != nil {
fmt.Sprintf(">> %s", x.iexpr);
}
return "<<";
}
func (x *field) String() string {
if x.tname == "" {
return x.fname;
}
return x.fname + " : " + x.tname;
}
func (x *negation) String() string {
return fmt.Sprintf("!%v", x.neg);
}
func (x *option) String() string {
return fmt.Sprintf("[%v]", x.opt);
}
func (x *repetition) String() string {
if x.div == nil {
return fmt.Sprintf("{%v}", x.rep);
}
return fmt.Sprintf("{%v / %v}", x.rep, x.div);
}
func (x *custom) String() string {
return "<" + x.name + ">";
}
/* A Format is a set of production expressions. A new format is
created explicitly by calling Parse, or implicitly by one of
the Xprintf functions.
Formatting rules are specified in the following syntax:
Format = Production { ";" Production } [ ";" ] .
Production = Name "=" Expression .
Name = identifier { "." identifier } .
Expression = [ Term ] { "|" [ Term ] } .
Term = Factor { Factor } .
Factor = string_literal | Indent | Field | Negation | Group | Option | Repetition .
Indent = ">>" Factor | "<<" .
Field = ( "^" | "*" | Name ) [ ":" Name ] .
Negation = "!" Factor .
Group = "(" Expression ")" .
Option = "[" Expression "]" .
Repetition = "{" Expression "}" .
The syntax of white space, comments, identifiers, and string literals is
the same as in Go.
A production name corresponds to a Go type name of the form
PackageName.TypeName
(for instance format.Format). A production of the form
Name;
specifies a package name which is prepended to all subsequent production
names:
format;
Format = ... // this production matches the type format.Format
The basic operands of productions are string literals, field names, and
designators. String literals are printed as is, unless they contain a
single %-style format specifier (such as "%d"). In that case, they are
used as the format for fmt.Printf, with the current value as argument.
The designator "^" stands for the current value; a "*" denotes indirection
(pointers, arrays, maps, and interfaces).
A field may contain a format specifier of the form
: Expression
which specifies the field format irrespective of the field type.
Default formats are used for types without specific formating rules:
The "%v" format is used for values of all types expect pointer, array,
map, and interface types. They are using the "^" designator.
TODO complete this description
*/
type Format map [string] expr;
// ----------------------------------------------------------------------------
// Parsing
/* TODO
- have a format to select type name, field tag, field offset?
- use field tag as default format for that field
*/
type parser struct {
// scanning
scanner scanner.Scanner;
pos token.Position; // token position
tok token.Token; // one token look-ahead
lit []byte; // token literal
// error handling
errors io.ByteBuffer; // errors.Len() > 0 if there were errors
lastline int;
}
// The parser implements the scanner.ErrorHandler interface.
func (p *parser) Error(pos token.Position, msg string) {
if pos.Line != p.lastline {
// only report error if not on the same line as previous error
// in the hope to reduce number of follow-up errors reported
fmt.Fprintf(&p.errors, "%d:%d: %s\n", pos.Line, pos.Column, msg);
}
p.lastline = pos.Line;
}
func (p *parser) next() {
p.pos, p.tok, p.lit = p.scanner.Scan();
}
func (p *parser) error_expected(pos token.Position, msg string) {
msg = "expected " + msg;
if pos.Offset == p.pos.Offset {
// the error happened at the current position;
// make the error message more specific
msg += ", found '" + p.tok.String() + "'";
if p.tok.IsLiteral() {
msg += " " + string(p.lit);
}
}
p.Error(pos, msg);
}
func (p *parser) expect(tok token.Token) token.Position {
pos := p.pos;
if p.tok != tok {
p.error_expected(pos, "'" + tok.String() + "'");
}
p.next(); // make progress in any case
return pos;
}
func (p *parser) parseIdentifier() string {
name := string(p.lit);
p.expect(token.IDENT);
return name;
}
func (p *parser) parseName() string {
name := p.parseIdentifier();
for p.tok == token.PERIOD {
p.next();
name = name + "." + p.parseIdentifier();
}
return name;
}
func (p *parser) parseValue() []byte {
if p.tok != token.STRING {
p.expect(token.STRING);
return nil; // TODO should return something else?
}
// TODO get rid of back-and-forth conversions
// (change value to string?)
s, err := strconv.Unquote(string(p.lit));
if err != nil {
panic("scanner error?");
}
p.next();
return io.StringBytes(s);
}
func (p *parser) parseFactor() (x expr)
func (p *parser) parseExpr() expr
func (p *parser) parseField() expr {
var fname string;
switch p.tok {
case token.XOR:
fname = "^";
p.next();
case token.MUL:
fname = "*";
p.next();
case token.IDENT:
// TODO use reflect.ExpandType() to lookup a field
// during parse-time if posssible
fname = p.parseName();
default:
return nil;
}
var tname string;
if p.tok == token.COLON {
p.next();
tname = p.parseName();
}
return &field{fname, tname};
}
func (p *parser) parseFactor() (x expr) {
switch p.tok {
case token.STRING:
x = &literal{p.parseValue()};
case token.SHR:
p.next();
x = &indentation{p.parseFactor()};
case token.SHL:
p.next();
x = &indentation{nil};
case token.NOT:
p.next();
x = &negation{p.parseFactor()};
case token.LPAREN:
p.next();
x = p.parseExpr();
p.expect(token.RPAREN);
case token.LBRACK:
p.next();
x = &option{p.parseExpr()};
p.expect(token.RBRACK);
case token.LBRACE:
p.next();
x = p.parseExpr();
var div expr;
if p.tok == token.QUO {
p.next();
div = p.parseExpr();
}
x = &repetition{x, div};
p.expect(token.RBRACE);
default:
x = p.parseField();
}
return x;
}
func (p *parser) parseTerm() expr {
x := p.parseFactor();
if x != nil {
for {
y := p.parseFactor();
if y == nil {
break;
}
x = &sequence{x, y};
}
}
return x;
}
func (p *parser) parseExpr() expr {
x := p.parseTerm();
for p.tok == token.OR {
p.next();
y := p.parseTerm();
x = &alternative{x, y};
}
return x;
}
func (p *parser) parseProd() (string, expr) {
name := p.parseName();
p.expect(token.ASSIGN);
x := p.parseExpr();
return name, x;
}
func (p *parser) parseFormat() Format {
format := make(Format);
for p.tok != token.EOF {
pos := p.pos;
name, x := p.parseProd();
// add production to format
if t, found := format[name]; !found {
format[name] = x;
} else {
p.Error(pos, "production already declared: " + name);
}
if p.tok == token.SEMICOLON {
p.next();
} else {
break;
}
}
p.expect(token.EOF);
return format;
}
type formatError string
func (p formatError) String() string {
return string(p);
}
func readSource(src interface{}) ([]byte, os.Error) {
if src == nil {
return nil, formatError("src is nil");
}
switch s := src.(type) {
case string:
return io.StringBytes(s), nil;
case []byte:
if s == nil {
return nil, formatError("src is nil");
}
return s, nil;
case *io.ByteBuffer:
// is io.Read, but src is already available in []byte form
if s == nil {
return nil, formatError("src is nil");
}
return s.Data(), nil;
case io.Reader:
var buf io.ByteBuffer;
n, err := io.Copy(s, &buf);
if err != nil {
return nil, err;
}
return buf.Data(), nil
}
return nil, formatError("src type not supported");
}
// Parse parses a set of format productions. The format src may be
// a string, a []byte, or implement io.Read. The result is a Format
// if no errors occured; otherwise Parse returns nil.
//
func Parse(src interface{}, fmap FormatterMap) (f Format, err os.Error) {
s, err := readSource(src);
if err != nil {
return nil, err;
}
// parse format description
var p parser;
p.scanner.Init(s, &p, false);
p.next();
f = p.parseFormat();
// add custom formatters, if any
for name, form := range fmap {
if t, found := f[name]; !found {
f[name] = &custom{name, form};
} else {
fmt.Fprintf(&p.errors, "formatter already declared: %s", name);
}
}
if p.errors.Len() > 0 {
return nil, formatError(string(p.errors.Data()));
}
return f, nil;
}
func ParseOrDie(src interface{}, fmap FormatterMap) Format {
f, err := Parse(src, fmap);
if err != nil {
panic(err.String());
}
return f;
}
func (f Format) Dump() {
for name, form := range f {
fmt.Printf("%s = %v;\n", name, form);
}
}
// ----------------------------------------------------------------------------
// Formatting
func getField(v reflect.StructValue, fieldname string) reflect.Value {
t := v.Type().(reflect.StructType);
for i := 0; i < t.Len(); i++ {
name, typ, tag, offset := t.Field(i);
if name == fieldname {
return v.Field(i);
} else if name == "" {
// anonymous field - check type name
// TODO this is only going down one level - fix
if strings.HasSuffix(typ.Name(), "." + fieldname) {
return v.Field(i);
}
}
}
panicln(fmt.Sprintf("no field %s int %s", fieldname, t.Name()));
return nil;
}
func typename(value reflect.Value) string {
name := value.Type().Name();
if name != "" {
return name;
}
switch value.Kind() {
case reflect.ArrayKind: name = "array";
case reflect.BoolKind: name = "bool";
case reflect.ChanKind: name = "chan";
case reflect.DotDotDotKind: name = "ellipsis";
case reflect.FloatKind: name = "float";
case reflect.Float32Kind: name = "float32";
case reflect.Float64Kind: name = "float64";
case reflect.FuncKind: name = "func";
case reflect.IntKind: name = "int";
case reflect.Int16Kind: name = "int16";
case reflect.Int32Kind: name = "int32";
case reflect.Int64Kind: name = "int64";
case reflect.Int8Kind: name = "int8";
case reflect.InterfaceKind: name = "interface";
case reflect.MapKind: name = "map";
case reflect.PtrKind: name = "pointer";
case reflect.StringKind: name = "string";
case reflect.StructKind: name = "struct";
case reflect.UintKind: name = "uint";
case reflect.Uint16Kind: name = "uint16";
case reflect.Uint32Kind: name = "uint32";
case reflect.Uint64Kind: name = "uint64";
case reflect.Uint8Kind: name = "uint8";
case reflect.UintptrKind: name = "uintptr";
}
return name;
}
var defaults = map [int] expr {
reflect.ArrayKind: &field{"*", ""},
reflect.DotDotDotKind: &field{"*", ""},
reflect.InterfaceKind: &field{"*", ""},
reflect.MapKind: &field{"*", ""},
reflect.PtrKind: &field{"*", ""},
reflect.StringKind: &literal{io.StringBytes("%s")},
}
var catchAll = &literal{io.StringBytes("%v")};
func (f Format) getFormat(name string, value reflect.Value) expr {
/*
if name == "nil" {
fmt.Printf("value = %T %v, kind = %d\n", value, value, value.Kind());
panic();
}
*/
if fexpr, found := f[name]; found {
return fexpr;
}
if *debug {
fmt.Printf("no production for type: %s\n", name);
}
// no fexpr found - return kind-specific default value, if any
if fexpr, found := defaults[value.Kind()]; found {
return fexpr;
}
if *debug {
fmt.Printf("no default for type: %s\n", name);
}
return catchAll;
}
// Count the number of printf-style '%' formatters in s.
//
func percentCount(s []byte) int {
n := 0;
for i := 0; i < len(s); i++ {
if s[i] == '%' {
i++;
if i >= len(s) || s[i] != '%' { // don't count "%%"
n++;
}
}
}
return n;
}
func rawPrintf(w io.Writer, format []byte, value reflect.Value) {
// TODO find a better way to do this
x := value.Interface();
switch percentCount(format) {
case 0: w.Write(format);
case 1: fmt.Fprintf(w, string(format), x);
case 2: fmt.Fprintf(w, string(format), x, x);
case 3: fmt.Fprintf(w, string(format), x, x, x);
case 4: fmt.Fprintf(w, string(format), x, x, x, x);
default: panic("no support for more than 4 '%'-format chars yet");
}
}
// TODO this should become a Go built-in
func push(dst []int, x int) []int {
n := len(dst);
if n > cap(dst) {
panic("dst too small");
}
dst = dst[0 : n+1];
dst[n] = x;
return dst;
}
func append(dst, src []byte) []byte {
n, m := len(dst), len(src);
if n+m > cap(dst) {
panic("dst too small");
}
dst = dst[0 : n+m];
for i := 0; i < m; i++ {
dst[n+i] = src[i];
}
return dst;
}
type state struct {
f Format;
// indentation
indent_text []byte;
indent_widths []int;
}
func (ps *state) init(f Format) {
ps.f = f;
ps.indent_text = make([]byte, 0, 1000); // TODO don't use fixed cap
ps.indent_widths = make([]int, 0, 100); // TODO don't use fixed cap
}
func (ps *state) indent(text []byte) {
ps.indent_widths = push(ps.indent_widths, len(ps.indent_text));
ps.indent_text = append(ps.indent_text, text);
}
func (ps *state) outdent() {
i := len(ps.indent_widths);
if i > 0 {
ps.indent_text = ps.indent_text[0 : ps.indent_widths[i-1]];
ps.indent_widths = ps.indent_widths[0 : i-1];
}
}
func (ps *state) printIndented(w io.Writer, s []byte) {
// replace each '\n' with the indent + '\n'
i0 := 0;
for i := 0; i < len(s); i++ {
if s[i] == '\n' {
w.Write(s[i0 : i+1]);
w.Write(ps.indent_text);
i0 = i+1;
}
}
w.Write(s[i0 : len(s)]);
}
func (ps *state) printf(w io.Writer, format []byte, value reflect.Value) {
if len(ps.indent_widths) == 0 {
// no indentation
rawPrintf(w, format, value);
} else {
// print into temporary buffer
var buf io.ByteBuffer;
rawPrintf(&buf, format, value);
ps.printIndented(w, buf.Data());
}
}
func (ps *state) print(w io.Writer, fexpr expr, value reflect.Value, index, level int) bool
// Returns true if a non-empty field value was found.
func (ps *state) print0(w io.Writer, fexpr expr, value reflect.Value, index, level int) bool {
if fexpr == nil {
return true;
}
switch t := fexpr.(type) {
case *alternative:
// - print the contents of the first alternative with a non-empty field
// - result is true if there is at least one non-empty field
var buf io.ByteBuffer;
if ps.print(&buf, t.x, value, 0, level) {
w.Write(buf.Data());
return true;
} else {
var buf io.ByteBuffer;
if ps.print(&buf, t.y, value, 0, level) {
w.Write(buf.Data());
return true;
}
}
return false;
case *sequence:
// - print the contents of the sequence
// - result is true if there is no empty field
// TODO do we need to buffer here? why not?
b1 := ps.print(w, t.x, value, index, level);
b2 := ps.print(w, t.y, value, index, level);
return b1 && b2;
case *literal:
// - print the literal
// - result is always true (literal is never empty)
ps.printf(w, t.value, value);
return true;
case *indentation:
if t.iexpr != nil {
// indent
var buf io.ByteBuffer;
ps.print(&buf, t.iexpr, value, index, level);
ps.indent(buf.Data());
} else {
// outdent
ps.outdent();
}
return true;
case *field:
// - print the contents of the field
// - format is either the field format or the type-specific format
// - TODO look at field tag for default format
// - result is true if the field is not empty
switch t.fname {
case "^":
// identity - value doesn't change
case "*":
// indirect
switch v := value.(type) {
case reflect.ArrayValue:
if v.Len() <= index {
return false;
}
value = v.Elem(index);
case reflect.MapValue:
panic("reflection support for maps incomplete");
case reflect.PtrValue:
if v.Get() == nil {
return false;
}
value = v.Sub();
case reflect.InterfaceValue:
if v.Get() == nil {
return false;
}
value = v.Value();
default:
// TODO fix this
panic(fmt.Sprintf("error: * does not apply to `%s`\n", value.Type().Name()));
}
default:
// field
if s, is_struct := value.(reflect.StructValue); is_struct {
value = getField(s, t.fname);
} else {
// TODO fix this
panic(fmt.Sprintf("error: %s has no field `%s`\n", value.Type().Name(), t.fname));
}
}
// determine format
tname := t.tname;
if tname == "" {
tname = typename(value)
}
fexpr = ps.f.getFormat(tname, value);
return ps.print(w, fexpr, value, index, level);
case *negation:
// TODO is this operation useful at all?
// print the contents of the option if is contains an empty field
var buf io.ByteBuffer;
if !ps.print(&buf, t.neg, value, 0, level) {
w.Write(buf.Data());
}
return true;
case *option:
// print the contents of the option if it contains a non-empty field
var buf io.ByteBuffer;
if ps.print(&buf, t.opt, value, 0, level) {
w.Write(buf.Data());
}
return true;
case *repetition:
// print the contents of the repetition while there is a non-empty field
var buf io.ByteBuffer;
for i := 0; ps.print(&buf, t.rep, value, i, level); i++ {
if i > 0 {
ps.print(w, t.div, value, i, level);
}
w.Write(buf.Data());
buf.Reset();
}
return true;
case *custom:
var buf io.ByteBuffer;
if t.form(&buf, value.Interface(), t.name) {
ps.printIndented(w, buf.Data());
return true;
}
return false;
}
panic("unreachable");
return false;
}
func printTrace(indent int, format string, a ...) {
const dots =
". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . "
". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ";
const n = len(dots);
i := 2*indent;
for ; i > n; i -= n {
fmt.Print(dots);
}
fmt.Print(dots[0 : i]);
fmt.Printf(format, a);
}
func (ps *state) print(w io.Writer, fexpr expr, value reflect.Value, index, level int) bool {
if *trace {
printTrace(level, "%v, %d {\n", fexpr, /*value.Interface(), */index);
}
result := ps.print0(w, fexpr, value, index, level+1);
if *trace {
printTrace(level, "} %v\n", result);
}
return result;
}
// TODO proper error reporting
// Fprint formats each argument according to the format f
// and writes to w.
//
func (f Format) Fprint(w io.Writer, args ...) {
value := reflect.NewValue(args).(reflect.StructValue);
for i := 0; i < value.Len(); i++ {
fld := value.Field(i);
var ps state;
ps.init(f);
ps.print(w, f.getFormat(typename(fld), fld), fld, 0, 0);
}
}
// Print formats each argument according to the format f
// and writes to standard output.
//
func (f Format) Print(args ...) {
f.Fprint(os.Stdout, args);
}
// Sprint formats each argument according to the format f
// and returns the resulting string.
//
func (f Format) Sprint(args ...) string {
var buf io.ByteBuffer;
f.Fprint(&buf, args);
return string(buf.Data());
}