1
0
mirror of https://github.com/golang/go synced 2024-11-20 08:14:41 -07:00
go/usr/gri/pretty/format.go
Robert Griesemer f8ff3b1055 daily snapshot:
- more work on template-driven ast formatting
- added preliminary test suite
- added documentation

TBR=r
OCL=27858
CL=27858
2009-04-25 16:36:17 -07:00

741 lines
16 KiB
Go

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/* The format package implements syntax-directed formatting of arbitrary
data structures.
A format specification consists of a set of named productions in EBNF.
The production names correspond to the type names of the data structure
to be printed. The production expressions consist of literal values
(strings), references to fields, and alternative, grouped, optional,
and repetitive sub-expressions.
When printing a value, its type name is used to lookup the production
to be printed. Literal values are printed as is, field references are
resolved and the respective field value is printed instead (using its
type-specific production), and alternative, grouped, optional, and
repetitive sub-expressions are printed depending on whether they contain
"empty" fields or not. A field is empty if its value is nil.
*/
package format
import (
"fmt";
"go/scanner";
"go/token";
"io";
"os";
"reflect";
"strconv";
)
// ----------------------------------------------------------------------------
// Format representation
// A production expression is built from the following nodes.
//
type (
expr interface {
String() string;
};
alternative struct {
x, y expr;
};
sequence struct {
x, y expr;
};
field struct {
name string; // including "^", "*"
fexpr expr; // nil if no fexpr specified
};
literal struct {
// TODO should there be other types or should it all be string literals?
value []byte;
};
option struct {
x expr
};
repetition struct {
x expr
};
// TODO custom formats are not yet used
custom struct {
name string;
f func(w io.Write, value interface{}, name string) bool
};
)
// TODO If we had a basic accessor mechanism in the language (a field
// "f T" automatically implements a corresponding accessor "f() T", this
// could be expressed more easily by simply providing the field.
//
func (x *alternative) String() string {
return fmt.Sprintf("(%v | %v)", x.x, x.y);
}
func (x *sequence) String() string {
return fmt.Sprintf("%v %v", x.x, x.y);
}
func (x *field) String() string {
if x.fexpr == nil {
return x.name;
}
return fmt.Sprintf("%s: (%v)", x.name, x.fexpr);
}
func (x *literal) String() string {
return strconv.Quote(string(x.value));
}
func (x *option) String() string {
return fmt.Sprintf("[%v]", x.x);
}
func (x *repetition) String() string {
return fmt.Sprintf("{%v}", x.x);
}
func (x *custom) String() string {
return fmt.Sprintf("<custom %s>", x.name);
}
/* A Format is a set of production expressions. A new format is
created explicitly by calling Parse, or implicitly by one of
the Xprintf functions.
Formatting rules are specified in the following syntax:
Format = { Production } .
Production = Name [ "=" [ Expression ] ] ";" .
Name = identifier { "." identifier } .
Expression = Term { "|" Term } .
Term = Factor { Factor } .
Factor = string_literal | Field | Group | Option | Repetition .
Field = ( "^" | "*" | Name ) [ ":" Expression ] .
Group = "(" Expression ")" .
Option = "[" Expression "]" .
Repetition = "{" Expression "}" .
The syntax of white space, comments, identifiers, and string literals is
the same as in Go.
A production name corresponds to a Go type name of the form
PackageName.TypeName
(for instance format.Format). A production of the form
Name;
specifies a package name which is prepended to all subsequent production
names:
format;
Format = ... // this production matches the type format.Format
The basic operands of productions are string literals, field names, and
designators. String literals are printed as is, unless they contain a
single %-style format specifier (such as "%d"). In that case, they are
used as the format for fmt.Printf, with the current value as argument.
The designator "^" stands for the current value; a "*" denotes indirection
(pointers, arrays, maps, and interfaces).
A field may contain a format specifier of the form
: Expression
which specifies the field format irrespective of the field type.
Default formats are used for types without specific formating rules:
The "%v" format is used for values of all types expect pointer, array,
map, and interface types. They are using the "^" designator.
TODO complete this description
*/
type Format map [string] expr;
// ----------------------------------------------------------------------------
// Parsing
/* TODO
- installable custom formatters (like for template.go)
- have a format to select type name, field tag, field offset?
- use field tag as default format for that field
- field format override (":") is not working as it should
(cannot refer to another production - syntactially not possible
at the moment)
*/
type parser struct {
scanner scanner.Scanner;
// error handling
lastline int; // > 0 if there was any error
// next token
pos token.Position; // token position
tok token.Token; // one token look-ahead
lit []byte; // token literal
}
// The parser implements the scanner.ErrorHandler interface.
func (p *parser) Error(pos token.Position, msg string) {
if pos.Line != p.lastline {
// only report error if not on the same line as previous error
// in the hope to reduce number of follow-up errors reported
fmt.Fprintf(os.Stderr, "%d:%d: %s\n", pos.Line, pos.Column, msg);
}
p.lastline = pos.Line;
}
func (p *parser) next() {
p.pos, p.tok, p.lit = p.scanner.Scan();
}
func (p *parser) error_expected(pos token.Position, msg string) {
msg = "expected " + msg;
if pos.Offset == p.pos.Offset {
// the error happened at the current position;
// make the error message more specific
msg += ", found '" + p.tok.String() + "'";
if p.tok.IsLiteral() {
msg += " " + string(p.lit);
}
}
p.Error(pos, msg);
}
func (p *parser) expect(tok token.Token) token.Position {
pos := p.pos;
if p.tok != tok {
p.error_expected(pos, "'" + tok.String() + "'");
}
p.next(); // make progress in any case
return pos;
}
func (p *parser) parseIdentifier() string {
name := string(p.lit);
p.expect(token.IDENT);
return name;
}
func (p *parser) parseName() string {
name := p.parseIdentifier();
for p.tok == token.PERIOD {
p.next();
name = name + "." + p.parseIdentifier();
}
return name;
}
// TODO WriteByte should be a ByteBuffer method
func writeByte(buf *io.ByteBuffer, b byte) {
buf.Write([]byte{b});
}
func (p *parser) parseValue() []byte {
if p.tok != token.STRING {
p.expect(token.STRING);
return nil; // TODO should return something else?
}
// TODO get rid of back-and-forth conversions
// (change value to string?)
s, err := strconv.Unquote(string(p.lit));
if err != nil {
panic("scanner error?");
}
p.next();
return io.StringBytes(s);
}
func (p *parser) parseExpr() expr
func (p *parser) parseField() expr {
var name string;
switch p.tok {
case token.XOR:
name = "^";
p.next();
case token.MUL:
name = "*";
p.next();
case token.IDENT:
name = p.parseName();
default:
return nil;
}
var fexpr expr;
if p.tok == token.COLON {
p.next();
fexpr = p.parseExpr();
}
return &field{name, fexpr};
}
func (p *parser) parseFactor() (x expr) {
switch p.tok {
case token.STRING:
x = &literal{p.parseValue()};
case token.LPAREN:
p.next();
x = p.parseExpr();
p.expect(token.RPAREN);
case token.LBRACK:
p.next();
x = &option{p.parseExpr()};
p.expect(token.RBRACK);
case token.LBRACE:
p.next();
x = &repetition{p.parseExpr()};
p.expect(token.RBRACE);
default:
x = p.parseField();
}
return x;
}
func (p *parser) parseTerm() expr {
x := p.parseFactor();
if x == nil {
p.error_expected(p.pos, "factor");
p.next(); // make progress
return nil;
}
for {
y := p.parseFactor();
if y == nil {
break;
}
x = &sequence{x, y};
}
return x;
}
func (p *parser) parseExpr() expr {
x := p.parseTerm();
for p.tok == token.OR {
p.next();
y := p.parseTerm();
x = &alternative{x, y};
}
return x;
}
func (p *parser) parseFormat() Format {
format := make(Format);
prefix := "";
for p.tok != token.EOF {
pos := p.pos;
name := p.parseName();
if p.tok == token.ASSIGN {
// production
p.next();
var x expr;
if p.tok != token.SEMICOLON {
x = p.parseExpr();
}
// add production to format
name = prefix + name;
if t, found := format[name]; !found {
format[name] = x;
} else {
p.Error(pos, "production already declared: " + name);
}
} else {
// prefix only
prefix = name + ".";
}
p.expect(token.SEMICOLON);
}
return format;
}
func readSource(src interface{}, err scanner.ErrorHandler) []byte {
errmsg := "invalid input type (or nil)";
switch s := src.(type) {
case string:
return io.StringBytes(s);
case []byte:
return s;
case *io.ByteBuffer:
// is io.Read, but src is already available in []byte form
if s != nil {
return s.Data();
}
case io.Read:
var buf io.ByteBuffer;
n, os_err := io.Copy(s, &buf);
if os_err == nil {
return buf.Data();
}
errmsg = os_err.String();
}
if err != nil {
// TODO fix this
panic();
//err.Error(noPos, errmsg);
}
return nil;
}
// TODO do better error handling
// Parse parses a set of format productions. The format src may be
// a string, a []byte, or implement io.Read. The result is a Format
// if no errors occured; otherwise Parse returns nil.
//
func Parse(src interface{}) Format {
// initialize parser
var p parser;
p.scanner.Init(readSource(src, &p), &p, false);
p.next();
f := p.parseFormat();
if p.lastline > 0 {
return nil; // src contains errors
}
return f;
}
// ----------------------------------------------------------------------------
// Formatting
func fieldIndex(v reflect.StructValue, fieldname string) int {
t := v.Type().(reflect.StructType);
for i := 0; i < v.Len(); i++ {
name, typ, tag, offset := t.Field(i);
if name == fieldname {
return i;
}
}
return -1;
}
func getField(v reflect.StructValue, fieldname string) reflect.Value {
i := fieldIndex(v, fieldname);
if i < 0 {
panicln("field not found:", fieldname);
}
return v.Field(i);
}
func typename(value reflect.Value) string {
name := value.Type().Name();
if name != "" {
return name;
}
switch value.Kind() {
case reflect.ArrayKind: name = "array";
case reflect.BoolKind: name = "bool";
case reflect.ChanKind: name = "chan";
case reflect.DotDotDotKind: name = "...";
case reflect.FloatKind: name = "float";
case reflect.Float32Kind: name = "float32";
case reflect.Float64Kind: name = "float64";
case reflect.FuncKind: name = "func";
case reflect.IntKind: name = "int";
case reflect.Int16Kind: name = "int16";
case reflect.Int32Kind: name = "int32";
case reflect.Int64Kind: name = "int64";
case reflect.Int8Kind: name = "int8";
case reflect.InterfaceKind: name = "interface";
case reflect.MapKind: name = "map";
case reflect.PtrKind: name = "pointer";
case reflect.StringKind: name = "string";
case reflect.StructKind: name = "struct";
case reflect.UintKind: name = "uint";
case reflect.Uint16Kind: name = "uint16";
case reflect.Uint32Kind: name = "uint32";
case reflect.Uint64Kind: name = "uint64";
case reflect.Uint8Kind: name = "uint8";
case reflect.UintptrKind: name = "uintptr";
}
return name;
}
var defaults = map [int] expr {
reflect.ArrayKind: &field{"*", nil},
reflect.MapKind: &field{"*", nil},
reflect.PtrKind: &field{"*", nil},
}
var catchAll = &literal{io.StringBytes("%v")};
func (f Format) getFormat(value reflect.Value) expr {
if fexpr, found := f[typename(value)]; found {
return fexpr;
}
// no fexpr found - return kind-specific default value, if any
if fexpr, found := defaults[value.Kind()]; found {
return fexpr;
}
return catchAll;
}
// Count the number of printf-style '%' formatters in s.
// The result is 0, 1, or 2 (where 2 stands for 2 or more).
//
func percentCount(s []byte) int {
n := 0;
for i := 0; n < 2 && i < len(s); i++ {
// TODO should not count "%%"'s
if s[i] == '%' {
n++;
}
}
return n;
}
func printf(w io.Write, format []byte, value reflect.Value) {
// TODO this seems a bit of a hack
if percentCount(format) == 1 {
// exactly one '%' format specifier - try to use it
fmt.Fprintf(w, string(format), value.Interface());
} else {
// 0 or more then 1 '%' format specifier - ignore them
w.Write(format);
}
}
// Returns true if a non-empty field value was found.
func (f Format) print(w io.Write, fexpr expr, value reflect.Value, index int) bool {
debug := false; // enable for debugging
if debug {
fmt.Printf("print(%v, = %v, %v, %d)\n", w, fexpr, value.Interface(), index);
}
if fexpr == nil {
return true;
}
switch t := fexpr.(type) {
case *alternative:
// - print the contents of the first alternative with a non-empty field
// - result is true if there is at least one non-empty field
b := false;
var buf io.ByteBuffer;
if f.print(&buf, t.x, value, index) {
w.Write(buf.Data());
b = true;
} else {
buf.Reset();
if f.print(&buf, t.y, value, 0) {
w.Write(buf.Data());
b = true;
}
}
return b;
case *sequence:
// - print the contents of the sequence
// - result is true if there is no empty field
// TODO do we need to buffer here? why not?
b1 := f.print(w, t.x, value, index);
b2 := f.print(w, t.y, value, index);
return b1 && b2;
case *field:
// - print the contents of the field
// - format is either the field format or the type-specific format
// - TODO look at field tag for default format
// - result is true if the field is not empty
switch t.name {
case "^":
// identity - value doesn't change
case "*":
// indirect
switch v := value.(type) {
case reflect.PtrValue:
if v.Get() == nil {
return false;
}
value = v.Sub();
case reflect.ArrayValue:
if index < 0 || v.Len() <= index {
return false;
}
value = v.Elem(index);
case reflect.MapValue:
panic("reflection support for maps incomplete");
case reflect.InterfaceValue:
if v.Get() == nil {
return false;
}
value = v.Value();
default:
panic("not a ptr, array, map, or interface"); // TODO fix this
}
default:
// field
if s, is_struct := value.(reflect.StructValue); is_struct {
value = getField(s, t.name);
} else {
panic ("not a struct"); // TODO fix this
}
}
// determine format
fexpr = t.fexpr;
if fexpr == nil {
// no field format - use type-specific format
fexpr = f.getFormat(value);
}
return f.print(w, fexpr, value, index);
// BUG (6g?) crash with code below
/*
var buf io.ByteBuffer;
if f.print(&buf, fexpr, value, index) {
w.Write(buf.Data());
return true;
}
return false;
*/
case *literal:
// - print the literal
// - result is always true (literal is never empty)
printf(w, t.value, value);
return true;
case *option:
// print the contents of the option if it contains a non-empty field
//var foobar bool; // BUG w/o this declaration the code works!!!
var buf io.ByteBuffer;
if f.print(&buf, t.x, value, 0) {
w.Write(buf.Data());
return true;
}
return false;
case *repetition:
// print the contents of the repetition while there is a non-empty field
b := false;
for i := 0; ; i++ {
var buf io.ByteBuffer;
if f.print(&buf, t.x, value, i) {
w.Write(buf.Data());
b = true;
} else {
break;
}
}
return b;
case *custom:
return t.f(w, value.Interface(), t.name);
}
panic("unreachable");
return false;
}
// TODO proper error reporting
// Fprint formats each argument according to the format f
// and writes to w.
//
func (f Format) Fprint(w io.Write, args ...) {
value := reflect.NewValue(args).(reflect.StructValue);
for i := 0; i < value.Len(); i++ {
fld := value.Field(i);
f.print(w, f.getFormat(fld), fld, -1);
}
}
// Fprint formats each argument according to the format f
// and writes to standard output.
//
func (f Format) Print(args ...) {
f.Print(os.Stdout, args);
}
// Fprint formats each argument according to the format f
// and returns the resulting string.
//
func (f Format) Sprint(args ...) string {
var buf io.ByteBuffer;
f.Fprint(&buf, args);
return string(buf.Data());
}