From 3aa892c4f9187c619091d309e7b3da6dfd14f854 Mon Sep 17 00:00:00 2001
From: Robert Griesemer <gri@golang.org>
Date: Fri, 24 Apr 2009 17:22:58 -0700
Subject: [PATCH] daily snapshot: - more work on template-driven ast printing

R=r
OCL=27851
CL=27851
---
 usr/gri/pretty/ast.txt   |  46 ++++-
 usr/gri/pretty/format.go | 429 ++++++++++++++++++++++++++-------------
 2 files changed, 320 insertions(+), 155 deletions(-)

diff --git a/usr/gri/pretty/ast.txt b/usr/gri/pretty/ast.txt
index a4f62f3b8dd..998352ccaa3 100644
--- a/usr/gri/pretty/ast.txt
+++ b/usr/gri/pretty/ast.txt
@@ -1,14 +1,40 @@
-// TODO prefix decl doesn't work
-//ast .
+//string =
+//	"%s" ;
 
-ast.Ident =
-	Value .
+pointer =
+	^ ;
 
-ast.Program =
-	"package " Name "\n" { Decls "\n\n" } .
+array =
+	^ ;
 
-ast.GenDecl =
-	"def " .
+//token.Token =
+//	"token<%d>" ;  // this should be a Go-installed formatter
 
-ast.FuncDecl =
-	"func " .
\ No newline at end of file
+ast
+	;
+
+Comments =
+	"comments\n" ;
+
+Ident =
+	Value ;
+
+Program =
+	"package " Name "\n\n" { Decls "\n\n" } ;
+
+GenDecl =
+	Doc
+	Tok " (\n"
+	")\n";
+
+FuncType =
+	"(" { Params } ")" ;
+
+BlockStmt =
+	"{\n" "}\n" ;
+
+FuncDecl =
+	"func " Name Type [ " " Body ] ;
+	
+Decl =
+	^ ;
\ No newline at end of file
diff --git a/usr/gri/pretty/format.go b/usr/gri/pretty/format.go
index 35c3bc78ae2..d069ef4c8be 100644
--- a/usr/gri/pretty/format.go
+++ b/usr/gri/pretty/format.go
@@ -17,39 +17,88 @@ import (
 // -----------------------------------------------------------------------------
 // Format
 
-// node kind
-const (
-	self = iota;
-	alternative;
-	sequence;
-	field;
-	literal;
-	option;
-	repetition;
+// A production expression is built from the following nodes.
+//
+type (
+	expr interface {
+		implements_expr();
+	};
+
+	empty struct {
+	};
+
+	alternative struct {
+		x, y expr;
+	};
+
+	sequence struct {
+		x, y expr;
+	};
+
+	field struct {
+		name string;  // including "^", "*"
+		format expr;  // nil if no format specified
+	};
+	
+	literal struct {
+		// TODO should there be other types or should it all be string literals?
+		value []byte;
+	};
+
+	option struct {
+		x expr
+	};
+
+	repetition struct {
+		x expr
+	};
+
+	// TODO custom formats are not yet used
+	custom struct {
+		name string;
+		f func(w io.Write, value interface{}, name string) bool
+	};
 )
 
 
-type node struct {
-	kind int;
-	name string;  // field name
-	value []byte;  // literal value
-	x, y *node;
-}
+// These methods are used to enforce the "implements" relationship for
+// better compile-time type checking.
+//
+// TODO If we had a basic accessor mechanism in the language (a field
+// "f T" automatically implements a corresponding accessor "f() T", this
+// could be expressed more easily by simply providing the field.
+//
+func (x *empty) implements_expr()  {}
+func (x *alternative) implements_expr()  {}
+func (x *sequence) implements_expr()  {}
+func (x *field) implements_expr()  {}
+func (x *literal) implements_expr()  {}
+func (x *option) implements_expr()  {}
+func (x *repetition) implements_expr()  {}
+func (x *custom) implements_expr()  {}
 
 
-// A Format is a set of production nodes.
-type Format map [string] *node;
+// A Format is a set of production expressions.
+type Format map [string] expr;
 
 
 // -----------------------------------------------------------------------------
 // Parsing
 
+/*	TODO
+	- EBNF vs Kleene notation
+	- default formatters for basic types (may imply scopes so we can override)
+	- installable custom formatters (like for template.go)
+	- format strings
+*/
+
 /*	Format      = { Production } .
-	Production  = DottedName [ "=" Expression ] "." .
-	DottedName  = name { "." name } .
+	Production  = Name [ "=" [ Expression ] ] ";" .
+	Name        = identifier { "." identifier } .
 	Expression  = Term { "|" Term } .
 	Term        = Factor { Factor } .
-	Factor      = "*" | name | string_literal | Group | Option | Repetition .
+	Factor      = string_literal | Field | Group | Option | Repetition .
+	Field		= ( "^" | "*" | Name ) [ ":" Expression ] .
 	Group       = "(" Expression ")" .
 	Option      = "[" Expression "]" .
 	Repetition  = "{" Expression "}" .
@@ -109,30 +158,30 @@ func (p *parser) expect(tok token.Token) token.Position {
 }
 
 
-func (p *parser) parseName() string {
+func (p *parser) parseIdentifier() string {
 	name := string(p.lit);
 	p.expect(token.IDENT);
 	return name;
 }
 
 
-func (p *parser) parseDottedName() string {
-	name := p.parseName();
+func (p *parser) parseName() string {
+	name := p.parseIdentifier();
 	for p.tok == token.PERIOD {
 		p.next();
-		name = name + "." + p.parseName();
+		name = name + "." + p.parseIdentifier();
 	}
 	return name;
 }
 
 
-// TODO should have WriteByte in ByteBuffer instead!
-var (
-	newlineByte = []byte{'\n'};
-	tabByte = []byte{'\t'};
-)
+// TODO WriteByte should be a ByteBuffer method
+func writeByte(buf *io.ByteBuffer, b byte) {
+	buf.Write([]byte{b});
+}
 
 
+// TODO make this complete
 func escapeString(s []byte) []byte {
 	// the string syntax is correct since it comes from the scannner
 	var buf io.ByteBuffer;
@@ -141,14 +190,13 @@ func escapeString(s []byte) []byte {
 		if s[i] == '\\' {
 			buf.Write(s[i0 : i]);
 			i++;
+			var esc byte;
 			switch s[i] {
-			case 'n':
-				buf.Write(newlineByte);
-			case 't':
-				buf.Write(tabByte);
-			default:
-				panic("unhandled escape:", string(s[i]));
+			case 'n': esc = '\n';
+			case 't': esc = '\t';
+			default: panic("unhandled escape:", string(s[i]));
 			}
+			writeByte(&buf, esc);
 			i++;
 			i0 = i;
 		} else {
@@ -182,32 +230,54 @@ func (p *parser) parseValue() []byte {
 }
 
 
-func (p *parser) parseExpression() *node
+func (p *parser) parseExpr() expr
 
-func (p *parser) parseFactor() (x *node) {
+func (p *parser) parseField() expr {
+	var name string;
 	switch p.tok {
+	case token.XOR:
+		name = "^";
+		p.next();
 	case token.MUL:
-		x = &node{self, "", nil, nil, nil};
-
+		name = "*";
+		p.next();
 	case token.IDENT:
-		x = &node{field, p.parseName(), nil, nil, nil};
+		name = p.parseName();
+	default:
+		panic("unreachable");
+	}
+
+	var format expr;
+	if p.tok == token.COLON {
+		p.next();
+		format = p.parseExpr();
+	}
+	
+	return &field{name, format};
+}
+
+
+func (p *parser) parseFactor() (x expr) {
+	switch p.tok {
+	case token.XOR, token.MUL, token.IDENT:
+		x = p.parseField();
 
 	case token.STRING:
-		x = &node{literal, "", p.parseValue(), nil, nil};
+		x = &literal{p.parseValue()};
 
 	case token.LPAREN:
 		p.next();
-		x = p.parseExpression();
+		x = p.parseExpr();
 		p.expect(token.RPAREN);
 
 	case token.LBRACK:
 		p.next();
-		x = &node{option, "", nil, p.parseExpression(), nil};
+		x = &option{p.parseExpr()};
 		p.expect(token.RBRACK);
 
 	case token.LBRACE:
 		p.next();
-		x = &node{repetition, "", nil, p.parseExpression(), nil};
+		x = &repetition{p.parseExpr()};
 		p.expect(token.RBRACE);
 
 	default:
@@ -219,46 +289,52 @@ func (p *parser) parseFactor() (x *node) {
 }
 
 
-func (p *parser) parseTerm() *node {
+func (p *parser) parseTerm() expr {
 	x := p.parseFactor();
 
-	for	p.tok == token.IDENT ||
+	for	p.tok == token.XOR ||
+		p.tok == token.MUL ||
+		p.tok == token.IDENT ||
 		p.tok == token.STRING ||
 		p.tok == token.LPAREN ||
 		p.tok == token.LBRACK ||
 		p.tok == token.LBRACE
 	{
 		y := p.parseFactor();
-		x = &node{sequence, "", nil, x, y};
+		x = &sequence{x, y};
 	}
 
 	return x;
 }
 
 
-func (p *parser) parseExpression() *node {
+func (p *parser) parseExpr() expr {
 	x := p.parseTerm();
 
 	for p.tok == token.OR {
 		p.next();
 		y := p.parseTerm();
-		x = &node{alternative, "", nil, x, y};
+		x = &alternative{x, y};
 	}
 
 	return x;
 }
 
 
-func (p *parser) parseProduction() (string, *node) {
-	name := p.parseDottedName();
+func (p *parser) parseProduction() (string, expr) {
+	name := p.parseName();
 	
-	var x *node;
+	var x expr;
 	if p.tok == token.ASSIGN {
 		p.next();
-		x = p.parseExpression();
+		if p.tok == token.SEMICOLON {
+			x = &empty{};
+		} else {
+			x = p.parseExpr();
+		}
 	}
 
-	p.expect(token.PERIOD);
+	p.expect(token.SEMICOLON);
 
 	return name, x;
 }
@@ -365,118 +441,181 @@ func getField(v reflect.StructValue, fieldname string) reflect.Value {
 }
 
 
-func (f Format) apply(w io.Write, v reflect.Value) bool
+func typename(value reflect.Value) string {
+	name := value.Type().Name();
+
+	if name != "" {
+		return name;
+	}
+
+	switch value.Kind() {
+	case reflect.ArrayKind: name = "array";
+	case reflect.BoolKind: name = "bool";
+	case reflect.ChanKind: name = "chan";
+	case reflect.DotDotDotKind: name = "...";
+	case reflect.FloatKind: name = "float";
+	case reflect.Float32Kind: name = "float32";
+	case reflect.Float64Kind: name = "float64";
+	case reflect.FuncKind: name = "func";
+	case reflect.IntKind: name = "int";
+	case reflect.Int16Kind: name = "int16";
+	case reflect.Int32Kind: name = "int32";
+	case reflect.Int64Kind: name = "int64";
+	case reflect.Int8Kind: name = "int8";
+	case reflect.InterfaceKind: name = "interface";
+	case reflect.MapKind: name = "map";
+	case reflect.PtrKind: name = "pointer";
+	case reflect.StringKind: name = "string";
+	case reflect.StructKind: name = "struct";
+	case reflect.UintKind: name = "uint";
+	case reflect.Uint16Kind: name = "uint16";
+	case reflect.Uint32Kind: name = "uint32";
+	case reflect.Uint64Kind: name = "uint64";
+	case reflect.Uint8Kind: name = "uint8";
+	case reflect.UintptrKind: name = "uintptr";
+	}
+	
+	return name;
+}
+
+
+var defaultFormat = &literal{io.StringBytes("%v")};
+
+func (f Format) getFormat(value reflect.Value) expr {
+	if format, found := f[typename(value)]; found {
+		return format;
+	}
+	// no format found
+	return defaultFormat;
+}
+
+
+// Count the number of printf-style '%' formatters in s.
+// The result is 0, 1, or 2 (where 2 stands for 2 or more).
+//
+func percentCount(s []byte) int {
+	n := 0;
+	for i := 0; n < 2 && i < len(s); i++ {
+		// TODO should not count "%%"'s
+		if s[i] == '%' {
+			n++;
+		}
+	}
+	return n;
+}
+
+
+func printf(w io.Write, format []byte, value reflect.Value) {
+	// TODO this seems a bit of a hack
+	if percentCount(format) == 1 {
+		// exactly one '%' format specifier - try to use it
+		fmt.Fprintf(w, string(format), value.Interface());
+	} else {
+		// 0 or more then 1 '%' format specifier - ignore them
+		w.Write(format);
+	}
+}
+
 
 // Returns true if a non-empty field value was found.
-func (f Format) print(w io.Write, x *node, v reflect.Value, index int) bool {
-	switch x.kind {
-	case self:
-		panic("self");
+func (f Format) print(w io.Write, format expr, value reflect.Value, index int) bool {
+	switch t := format.(type) {
+	case *empty:
+		return true;
 
-	case alternative:
+	case *alternative:
 		// print the contents of the first alternative with a non-empty field
 		var buf io.ByteBuffer;
-		if !f.print(&buf, x.x, v, -1) {
-			f.print(&buf, x.y, v, -1);
+		b := f.print(&buf, t.x, value, index);
+		if !b {
+			b = f.print(&buf, t.y, value, index);
 		}
-		w.Write(buf.Data());
-
-	case sequence:
-		f.print(w, x.x, v, -1);
-		f.print(w, x.y, v, -1);
-
-	case field:
-		if sv, is_struct := v.(reflect.StructValue); is_struct {
-			return f.apply(w, getField(sv, x.name));
-		} else {
-			panicln("not in a struct - field:", x.name);
-		}
-
-	case literal:
-		w.Write(x.value);
-
-	case option:
-		// print the contents of the option if there is a non-empty field
-		var buf io.ByteBuffer;
-		if f.print(&buf, x.x, v, -1) {
+		if b {
 			w.Write(buf.Data());
 		}
+		return index < 0 || b;
 
-	case repetition:
+	case *sequence:
+		b1 := f.print(w, t.x, value, index);
+		b2 := f.print(w, t.y, value, index);
+		return index < 0 || b1 && b2;
+
+	case *field:
+		var x reflect.Value;
+		switch t.name {
+		case "^":
+			if v, is_ptr := value.(reflect.PtrValue); is_ptr {
+				if v.Get() == nil {
+					return false;
+				}
+				x = v.Sub();
+			} else if v, is_array := value.(reflect.ArrayValue); is_array {
+				if index < 0 || v.Len() <= index {
+					return false;
+				}
+				x = v.Elem(index);
+			} else if v, is_interface := value.(reflect.InterfaceValue); is_interface {
+				if v.Get() == nil {
+					return false;
+				}
+				x = v.Value();
+			} else {
+				panic("not a ptr, array, or interface");  // TODO fix this
+			}
+		case "*":
+			x = value;
+		default:
+			if v, is_struct := value.(reflect.StructValue); is_struct {
+				x = getField(v, t.name);
+			} else {
+				panic ("not a struct");  // TODO fix this
+			}
+		}
+		format = t.format;
+		if format == nil {
+			format = f.getFormat(x);
+		}
+		b := f.print(w, format, x, index);
+		return index < 0 || b;
+
+	case *literal:
+		printf(w, t.value, value);
+		return true;
+
+	case *option:
+		// print the contents of the option if there is a non-empty field
+		var buf io.ByteBuffer;
+		b := f.print(&buf, t.x, value, -1);
+		if b {
+			w.Write(buf.Data());
+		}
+		return index < 0 || b;
+
+	case *repetition:
 		// print the contents of the repetition while there is a non-empty field
+		b := false;
 		for i := 0; ; i++ {
 			var buf io.ByteBuffer;
-			if f.print(&buf, x.x, v, i) {
+			if f.print(&buf, t.x, value, i) {
 				w.Write(buf.Data());
+				b = true;
 			} else {
 				break;
 			}
 		}
-
-	default:
-		panic("unreachable");
+		return index < 0 || b;
+		
+	case *custom:
+		b := t.f(w, value.Interface(), t.name);
+		return index < 0 || b;
 	}
-
+	
+	panic("unreachable");
 	return false;
 }
 
 
-func (f Format) Dump() {
-	for name, x := range f {
-		println(name, x);
-	}
-}
-
-
-func (f Format) apply(w io.Write, v reflect.Value) bool {
-	println("apply typename:", v.Type().Name());
-
-	if x, found := f[v.Type().Name()]; found {
-		// format using corresponding production
-		f.print(w, x, v, -1);
-		
-	} else {
-		// format using default formats
-		switch x := v.(type) {
-		case reflect.ArrayValue:
-			if x.Len() == 0 {
-				return false;
-			}
-			for i := 0; i < x.Len(); i++ {
-				f.apply(w, x.Elem(i));
-			}
-
-		case reflect.StringValue:
-			w.Write(io.StringBytes(x.Get()));
-
-		case reflect.IntValue:
-			// TODO is this the correct way to check the right type?
-			// or should it be t, ok := x.Interface().(token.Token) instead?
-			if x.Type().Name() == "token.Token" {
-				fmt.Fprintf(w, "%s", token.Token(x.Get()).String());
-			} else {
-				fmt.Fprintf(w, "%d", x.Get());
-			}
-
-		case reflect.InterfaceValue:
-			f.apply(w, x.Value());
-
-		case reflect.PtrValue:
-			// TODO is this the correct way to check nil ptr?
-			if x.Get() == nil {
-				return false;
-			}
-			return f.apply(w, x.Sub());
-
-		default:
-			panicln("unsupported kind:", v.Kind());
-		}
-	}
-
-	return true;
-}
-
-
 func (f Format) Apply(w io.Write, data interface{}) {
-	f.apply(w, reflect.NewValue(data));
+	value := reflect.NewValue(data);
+	f.print(w, f.getFormat(value), value, -1);
 }