go/usr/gri/pretty/format.go

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

/*	The format package implements syntax-directed formatting of arbitrary
	data structures.

	A format specification consists of a set of named productions in EBNF.
	The production names correspond to the type names of the data structure
	to be printed. The production expressions consist of literal values
	(strings), references to fields, and alternative, grouped, optional,
	and repetitive sub-expressions.

	When printing a value, its type name is used to look up the production
	to be printed. Literal values are printed as is, field references are
	resolved and the respective field values are printed instead (using their
	type-specific productions), and alternative, grouped, optional, and
	repetitive sub-expressions are printed depending on whether they contain
	"empty" fields or not. A field is empty if its value is nil.
*/
package format

import (
	"flag";
	"fmt";
	"go/scanner";
	"go/token";
	"io";
	"os";
	"reflect";
	"strconv";
	"strings";
)


// TODO should probably do this in a different way
var (
	debug = flag.Bool("d", false, "debug mode");
	trace = flag.Bool("t", false, "trace mode");
)


// ----------------------------------------------------------------------------
// Format representation

type (
	Formatter func(w io.Writer, value interface{}, name string) bool;
	FormatterMap map[string]Formatter;
)


// A production expression is built from the following nodes.
//
type (
	expr interface {
		String() string;
	};

	alternative struct {
		x, y expr;
	};

	sequence struct {
		x, y expr;
	};

	literal struct {
		// TODO should there be other types or should it all be string literals?
		value []byte;
	};

	indentation struct {
		iexpr expr;  // outdent if nil
	};

	field struct {
		fname string;  // including "^", "*"
		tname string;  // "" if no tname specified
	};

	negation struct {
		neg expr;
	};

	option struct {
		opt expr;
	};

	repetition struct {
		rep expr;
		div expr;
	};

	custom struct {
		name string;
		form Formatter
	};
)


func (x *alternative) String() string {
	return fmt.Sprintf("(%v | %v)", x.x, x.y);
}


func (x *sequence) String() string {
	return fmt.Sprintf("%v %v", x.x, x.y);
}


func (x *literal) String() string {
	return strconv.Quote(string(x.value));
}


func (x *indentation) String() string {
	if x.iexpr != nil {
		fmt.Sprintf(">> %s", x.iexpr);
	}
	return "<<";
}


func (x *field) String() string {
	if x.tname == "" {
		return x.fname;
	}
	return x.fname + " : " + x.tname;
}


func (x *negation) String() string {
	return fmt.Sprintf("!%v", x.neg);
}


func (x *option) String() string {
	return fmt.Sprintf("[%v]", x.opt);
}


func (x *repetition) String() string {
	if x.div == nil {
		return fmt.Sprintf("{%v}", x.rep);
	}
	return fmt.Sprintf("{%v / %v}", x.rep, x.div);
}


func (x *custom) String() string {
	return "<" + x.name + ">";
}


/*	A Format is a set of production expressions. A new format is
	created explicitly by calling Parse, or implicitly by one of
	the Xprintf functions.

	Formatting rules are specified in the following syntax:

		Format      = Production { ";" Production } [ ";" ] .
		Production  = Name "=" Expression .
		Name        = identifier { "." identifier } .
		Expression  = [ Term ] { "|" [ Term ] } .
		Term        = Factor { Factor } .
		Factor      = string_literal | Indent | Field | Negation | Group | Option | Repetition .
		Indent      = ">>" Factor | "<<" .
		Field       = ( "^" | "*" | Name ) [ ":" Name ] .
		Negation    = "!" Factor .
		Group       = "(" Expression ")" .
		Option      = "[" Expression "]" .
		Repetition  = "{" Expression "}" .

	The syntax of white space, comments, identifiers, and string literals is
	the same as in Go.

	A production name corresponds to a Go type name of the form

		PackageName.TypeName

	(for instance format.Format). A production of the form

		Name;

	specifies a package name which is prepended to all subsequent production
	names:

		format;
		Format = ...	// this production matches the type format.Format

	The basic operands of productions are string literals, field names, and
	designators. String literals are printed as is, unless they contain a
	single %-style format specifier (such as "%d"). In that case, they are
	used as the format for fmt.Printf, with the current value as argument.

	The designator "^" stands for the current value; a "*" denotes indirection
	(pointers, arrays, maps, and interfaces).

	A field may contain a format specifier of the form

		: Expression

	which specifies the field format irrespective of the field type.

	Default formats are used for types without specific formating rules:
	The "%v" format is used for values of all types expect pointer, array,
	map, and interface types. They are using the "^" designator.

	TODO complete this description
*/
type Format map [string] expr;


// ----------------------------------------------------------------------------
// Parsing

/*	TODO
	- have a format to select type name, field tag, field offset?
	- use field tag as default format for that field
*/

type parser struct {
	// scanning
	scanner scanner.Scanner;
	pos token.Position;  // token position
	tok token.Token;  // one token look-ahead
	lit []byte;  // token literal

	// error handling
	errors io.ByteBuffer;  // errors.Len() > 0 if there were errors
	lastline int;
}


// The parser implements the scanner.ErrorHandler interface.
func (p *parser) Error(pos token.Position, msg string) {
	if pos.Line != p.lastline {
		// only report error if not on the same line as previous error
		// in the hope to reduce number of follow-up errors reported
		fmt.Fprintf(&p.errors, "%d:%d: %s\n", pos.Line, pos.Column, msg);
	}
	p.lastline = pos.Line;
}


func (p *parser) next() {
	p.pos, p.tok, p.lit = p.scanner.Scan();
}


func (p *parser) error_expected(pos token.Position, msg string) {
	msg = "expected " + msg;
	if pos.Offset == p.pos.Offset {
		// the error happened at the current position;
		// make the error message more specific
		msg += ", found '" + p.tok.String() + "'";
		if p.tok.IsLiteral() {
			msg += " " + string(p.lit);
		}
	}
	p.Error(pos, msg);
}


func (p *parser) expect(tok token.Token) token.Position {
	pos := p.pos;
	if p.tok != tok {
		p.error_expected(pos, "'" + tok.String() + "'");
	}
	p.next();  // make progress in any case
	return pos;
}


func (p *parser) parseIdentifier() string {
	name := string(p.lit);
	p.expect(token.IDENT);
	return name;
}


func (p *parser) parseName() string {
	name := p.parseIdentifier();
	for p.tok == token.PERIOD {
		p.next();
		name = name + "." + p.parseIdentifier();
	}
	return name;
}


func (p *parser) parseValue() []byte {
	if p.tok != token.STRING {
		p.expect(token.STRING);
		return nil;  // TODO should return something else?
	}

	// TODO get rid of back-and-forth conversions
	//      (change value to string?)
	s, err := strconv.Unquote(string(p.lit));
	if err != nil {
		panic("scanner error?");
	}

	p.next();
	return io.StringBytes(s);
}


func (p *parser) parseFactor() (x expr)
func (p *parser) parseExpr() expr

func (p *parser) parseField() expr {
	var fname string;
	switch p.tok {
	case token.XOR:
		fname = "^";
		p.next();
	case token.MUL:
		fname = "*";
		p.next();
	case token.IDENT:
		// TODO use reflect.ExpandType() to lookup a field
		// during parse-time if posssible
		fname = p.parseName();
	default:
		return nil;
	}

	var tname string;
	if p.tok == token.COLON {
		p.next();
		tname = p.parseName();
	}

	return &field{fname, tname};
}


func (p *parser) parseFactor() (x expr) {
	switch p.tok {
	case token.STRING:
		x = &literal{p.parseValue()};

	case token.SHR:
		p.next();
		x = &indentation{p.parseFactor()};

	case token.SHL:
		p.next();
		x = &indentation{nil};

	case token.NOT:
		p.next();
		x = &negation{p.parseFactor()};

	case token.LPAREN:
		p.next();
		x = p.parseExpr();
		p.expect(token.RPAREN);

	case token.LBRACK:
		p.next();
		x = &option{p.parseExpr()};
		p.expect(token.RBRACK);

	case token.LBRACE:
		p.next();
		x = p.parseExpr();
		var div expr;
		if p.tok == token.QUO {
			p.next();
			div = p.parseExpr();
		}
		x = &repetition{x, div};
		p.expect(token.RBRACE);

	default:
		x = p.parseField();
	}

	return x;
}


func (p *parser) parseTerm() expr {
	x := p.parseFactor();

	if x != nil {
		for {
			y := p.parseFactor();
			if y == nil {
				break;
			}
			x = &sequence{x, y};
		}
	}

	return x;
}


func (p *parser) parseExpr() expr {
	x := p.parseTerm();

	for p.tok == token.OR {
		p.next();
		y := p.parseTerm();
		x = &alternative{x, y};
	}

	return x;
}


func (p *parser) parseProd() (string, expr) {
	name := p.parseName();
	p.expect(token.ASSIGN);
	x := p.parseExpr();
	return name, x;
}


func (p *parser) parseFormat() Format {
	format := make(Format);

	for p.tok != token.EOF {
		pos := p.pos;
		name, x := p.parseProd();

		// add production to format
		if t, found := format[name]; !found {
			format[name] = x;
		} else {
			p.Error(pos, "production already declared: " + name);
		}

		if p.tok == token.SEMICOLON {
			p.next();
		} else {
			break;
		}
	}
	p.expect(token.EOF);

	return format;
}


type formatError string

func (p formatError) String() string {
	return string(p);
}


func readSource(src interface{}) ([]byte, os.Error) {
	if src == nil {
		return nil, formatError("src is nil");
	}

	switch s := src.(type) {
	case string:
		return io.StringBytes(s), nil;

	case []byte:
		if s == nil {
			return nil, formatError("src is nil");
		}
		return s, nil;

	case *io.ByteBuffer:
		// is io.Read, but src is already available in []byte form
		if s == nil {
			return nil, formatError("src is nil");
		}
		return s.Data(), nil;

	case io.Reader:
		var buf io.ByteBuffer;
		n, err := io.Copy(s, &buf);
		if err != nil {
			return nil, err;
		}
		return buf.Data(), nil
	}

	return nil, formatError("src type not supported");
}


// Parse parses a set of format productions. The format src may be
// a string, a []byte, or implement io.Read. The result is a Format
// if no errors occured; otherwise Parse returns nil.
//
func Parse(src interface{}, fmap FormatterMap) (f Format, err os.Error) {
	s, err := readSource(src);
	if err != nil {
		return nil, err;
	}

	// parse format description
	var p parser;
	p.scanner.Init(s, &p, false);
	p.next();
	f = p.parseFormat();

	// add custom formatters, if any
	for name, form := range fmap {
		if t, found := f[name]; !found {
			f[name] = &custom{name, form};
		} else {
			fmt.Fprintf(&p.errors, "formatter already declared: %s", name);
		}
	}

	if p.errors.Len() > 0 {
		return nil, formatError(string(p.errors.Data()));
	}

	return f, nil;
}


func ParseOrDie(src interface{}, fmap FormatterMap) Format {
	f, err := Parse(src, fmap);
	if err != nil {
		panic(err.String());
	}
	return f;
}


func (f Format) Dump() {
	for name, form := range f {
		fmt.Printf("%s = %v;\n", name, form);
	}
}


// ----------------------------------------------------------------------------
// Formatting

func getField(v reflect.StructValue, fieldname string) reflect.Value {
	t := v.Type().(reflect.StructType);
	for i := 0; i < t.Len(); i++ {
		name, typ, tag, offset := t.Field(i);
		if name == fieldname {
			return v.Field(i);
		} else if name == "" {
			// anonymous field - check type name
			// TODO this is only going down one level - fix
			if strings.HasSuffix(typ.Name(), "." + fieldname) {
				return v.Field(i);
			}
		}
	}
	panicln(fmt.Sprintf("no field %s int %s", fieldname, t.Name()));
	return nil;
}


func typename(value reflect.Value) string {
	name := value.Type().Name();

	if name != "" {
		return name;
	}

	switch value.Kind() {
	case reflect.ArrayKind: name = "array";
	case reflect.BoolKind: name = "bool";
	case reflect.ChanKind: name = "chan";
	case reflect.DotDotDotKind: name = "ellipsis";
	case reflect.FloatKind: name = "float";
	case reflect.Float32Kind: name = "float32";
	case reflect.Float64Kind: name = "float64";
	case reflect.FuncKind: name = "func";
	case reflect.IntKind: name = "int";
	case reflect.Int16Kind: name = "int16";
	case reflect.Int32Kind: name = "int32";
	case reflect.Int64Kind: name = "int64";
	case reflect.Int8Kind: name = "int8";
	case reflect.InterfaceKind: name = "interface";
	case reflect.MapKind: name = "map";
	case reflect.PtrKind: name = "pointer";
	case reflect.StringKind: name = "string";
	case reflect.StructKind: name = "struct";
	case reflect.UintKind: name = "uint";
	case reflect.Uint16Kind: name = "uint16";
	case reflect.Uint32Kind: name = "uint32";
	case reflect.Uint64Kind: name = "uint64";
	case reflect.Uint8Kind: name = "uint8";
	case reflect.UintptrKind: name = "uintptr";
	}

	return name;
}


var defaults = map [int] expr {
	reflect.ArrayKind: &field{"*", ""},
	reflect.DotDotDotKind: &field{"*", ""},
	reflect.InterfaceKind: &field{"*", ""},
	reflect.MapKind: &field{"*", ""},
	reflect.PtrKind: &field{"*", ""},
	reflect.StringKind: &literal{io.StringBytes("%s")},
}

var catchAll = &literal{io.StringBytes("%v")};

func (f Format) getFormat(name string, value reflect.Value) expr {
	/*
	if name == "nil" {
		fmt.Printf("value = %T %v, kind = %d\n", value, value, value.Kind());
		panic();
	}
	*/

	if fexpr, found := f[name]; found {
		return fexpr;
	}

	if *debug {
		fmt.Printf("no production for type: %s\n", name);
	}

	// no fexpr found - return kind-specific default value, if any
	if fexpr, found := defaults[value.Kind()]; found {
		return fexpr;
	}

	if *debug {
		fmt.Printf("no default for type: %s\n", name);
	}

	return catchAll;
}


// Count the number of printf-style '%' formatters in s.
//
func percentCount(s []byte) int {
	n := 0;
	for i := 0; i < len(s); i++ {
		if s[i] == '%' {
			i++;
			if i >= len(s) || s[i] != '%' {  // don't count "%%"
				n++;
			}
		}
	}
	return n;
}


func rawPrintf(w io.Writer, format []byte, value reflect.Value) {
	// TODO find a better way to do this
	x := value.Interface();
	switch percentCount(format) {
	case  0: w.Write(format);
	case  1: fmt.Fprintf(w, string(format), x);
	case  2: fmt.Fprintf(w, string(format), x, x);
	case  3: fmt.Fprintf(w, string(format), x, x, x);
	case  4: fmt.Fprintf(w, string(format), x, x, x, x);
	default: panic("no support for more than 4 '%'-format chars yet");
	}
}


// TODO this should become a Go built-in
func push(dst []int, x int) []int {
	n := len(dst);
	if n > cap(dst) {
		panic("dst too small");
	}
	dst = dst[0 : n+1];
	dst[n] = x;
	return dst;
}


func append(dst, src []byte) []byte {
	n, m := len(dst), len(src);
	if n+m > cap(dst) {
		panic("dst too small");
	}
	dst = dst[0 : n+m];
	for i := 0; i < m; i++ {
		dst[n+i] = src[i];
	}
	return dst;
}


type state struct {
	f Format;

	// indentation
	indent_text []byte;
	indent_widths []int;
}


func (ps *state) init(f Format) {
	ps.f = f;
	ps.indent_text = make([]byte, 0, 1000);  // TODO don't use fixed cap
	ps.indent_widths = make([]int, 0, 100);  // TODO don't use fixed cap
}


func (ps *state) indent(text []byte) {
	ps.indent_widths = push(ps.indent_widths, len(ps.indent_text));
	ps.indent_text = append(ps.indent_text, text);
}


func (ps *state) outdent() {
	i := len(ps.indent_widths);
	if i > 0 {
		ps.indent_text = ps.indent_text[0 : ps.indent_widths[i-1]];
		ps.indent_widths = ps.indent_widths[0 : i-1];
	}
}


func (ps *state) printIndented(w io.Writer, s []byte) {
	// replace each '\n' with the indent + '\n'
	i0 := 0;
	for i := 0; i < len(s); i++ {
		if s[i] == '\n' {
			w.Write(s[i0 : i+1]);
			w.Write(ps.indent_text);
			i0 = i+1;
		}
	}
	w.Write(s[i0 : len(s)]);
}


func (ps *state) printf(w io.Writer, format []byte, value reflect.Value) {
	if len(ps.indent_widths) == 0 {
		// no indentation
		rawPrintf(w, format, value);
	} else {
		// print into temporary buffer
		var buf io.ByteBuffer;
		rawPrintf(&buf, format, value);
		ps.printIndented(w, buf.Data());
	}
}


func (ps *state) print(w io.Writer, fexpr expr, value reflect.Value, index, level int) bool

// Returns true if a non-empty field value was found.
func (ps *state) print0(w io.Writer, fexpr expr, value reflect.Value, index, level int) bool {
	if fexpr == nil {
		return true;
	}

	switch t := fexpr.(type) {
	case *alternative:
		// - print the contents of the first alternative with a non-empty field
		// - result is true if there is at least one non-empty field
		var buf io.ByteBuffer;
		if ps.print(&buf, t.x, value, 0, level) {
			w.Write(buf.Data());
			return true;
		} else {
			var buf io.ByteBuffer;
			if ps.print(&buf, t.y, value, 0, level) {
				w.Write(buf.Data());
				return true;
			}
		}
		return false;

	case *sequence:
		// - print the contents of the sequence
		// - result is true if there is no empty field
		// TODO do we need to buffer here? why not?
		b1 := ps.print(w, t.x, value, index, level);
		b2 := ps.print(w, t.y, value, index, level);
		return b1 && b2;

	case *literal:
		// - print the literal
		// - result is always true (literal is never empty)
		ps.printf(w, t.value, value);
		return true;

	case *indentation:
		if t.iexpr != nil {
			// indent
			var buf io.ByteBuffer;
			ps.print(&buf, t.iexpr, value, index, level);
			ps.indent(buf.Data());

		} else {
			// outdent
			ps.outdent();
		}
		return true;

	case *field:
		// - print the contents of the field
		// - format is either the field format or the type-specific format
		// - TODO look at field tag for default format
		// - result is true if the field is not empty
		switch t.fname {
		case "^":
			// identity - value doesn't change

		case "*":
			// indirect
			switch v := value.(type) {
			case reflect.ArrayValue:
				if v.Len() <= index {
					return false;
				}
				value = v.Elem(index);

			case reflect.MapValue:
				panic("reflection support for maps incomplete");

			case reflect.PtrValue:
				if v.Get() == nil {
					return false;
				}
				value = v.Sub();

			case reflect.InterfaceValue:
				if v.Get() == nil {
					return false;
				}
				value = v.Value();

			default:
				// TODO fix this
				panic(fmt.Sprintf("error: * does not apply to `%s`\n", value.Type().Name()));
			}

		default:
			// field
			if s, is_struct := value.(reflect.StructValue); is_struct {
				value = getField(s, t.fname);
			} else {
				// TODO fix this
				panic(fmt.Sprintf("error: %s has no field `%s`\n", value.Type().Name(), t.fname));
			}
		}

		// determine format
		tname := t.tname;
		if tname == "" {
			tname = typename(value)
		}
		fexpr = ps.f.getFormat(tname, value);

		return ps.print(w, fexpr, value, index, level);

	case *negation:
		// TODO is this operation useful at all?
		// print the contents of the option if is contains an empty field
		var buf io.ByteBuffer;
		if !ps.print(&buf, t.neg, value, 0, level) {
			w.Write(buf.Data());
		}
		return true;

	case *option:
		// print the contents of the option if it contains a non-empty field
		var buf io.ByteBuffer;
		if ps.print(&buf, t.opt, value, 0, level) {
			w.Write(buf.Data());
		}
		return true;

	case *repetition:
		// print the contents of the repetition while there is a non-empty field
		var buf io.ByteBuffer;
		for i := 0; ps.print(&buf, t.rep, value, i, level); i++ {
			if i > 0 {
				ps.print(w, t.div, value, i, level);
			}
			w.Write(buf.Data());
			buf.Reset();
		}
		return true;

	case *custom:
		var buf io.ByteBuffer;
		if t.form(&buf, value.Interface(), t.name) {
			ps.printIndented(w, buf.Data());
			return true;
		}
		return false;
	}

	panic("unreachable");
	return false;
}


func printTrace(indent int, format string, a ...) {
	const dots =
		". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . "
		". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ";
	const n = len(dots);
	i := 2*indent;
	for ; i > n; i -= n {
		fmt.Print(dots);
	}
	fmt.Print(dots[0 : i]);
	fmt.Printf(format, a);
}


func (ps *state) print(w io.Writer, fexpr expr, value reflect.Value, index, level int) bool {
	if *trace {
		printTrace(level, "%v, %d {\n", fexpr, /*value.Interface(), */index);
	}

	result := ps.print0(w, fexpr, value, index, level+1);

	if *trace {
		printTrace(level, "} %v\n", result);
	}
	return result;
}


// TODO proper error reporting

// Fprint formats each argument according to the format f
// and writes to w.
//
func (f Format) Fprint(w io.Writer, args ...) {
	value := reflect.NewValue(args).(reflect.StructValue);
	for i := 0; i < value.Len(); i++ {
		fld := value.Field(i);
		var ps state;
		ps.init(f);
		ps.print(w, f.getFormat(typename(fld), fld), fld, 0, 0);
	}
}


// Print formats each argument according to the format f
// and writes to standard output.
//
func (f Format) Print(args ...) {
	f.Fprint(os.Stdout, args);
}


// Sprint formats each argument according to the format f
// and returns the resulting string.
//
func (f Format) Sprint(args ...) string {
	var buf io.ByteBuffer;
	f.Fprint(&buf, args);
	return string(buf.Data());
}