From 787dd4d3f358a3f9373b692a7493e4ef191b7513 Mon Sep 17 00:00:00 2001 From: Robert Griesemer Date: Wed, 20 May 2009 17:09:59 -0700 Subject: [PATCH] semi-weekly snapshot: - format.go implementation complete - lots of documentation added (not quite complete) TBR=r OCL=29130 CL=29130 --- usr/gri/pretty/ast.txt | 20 +- usr/gri/pretty/format.go | 797 +++++++++++++++++++++------------- usr/gri/pretty/format_test.go | 7 +- usr/gri/pretty/pretty.go | 2 +- 4 files changed, 500 insertions(+), 326 deletions(-) diff --git a/usr/gri/pretty/ast.txt b/usr/gri/pretty/ast.txt index 6354a9ef0fe..85373d6b346 100644 --- a/usr/gri/pretty/ast.txt +++ b/usr/gri/pretty/ast.txt @@ -2,13 +2,13 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Format file for printing AST nodes (package "ast"). +// Format file for printing AST nodes. + +ast "ast"; +token "token"; // ---------------------------------------------------------------------------- -// Elementary types - -token.Token = - ^:string; +// Basic types, support rules array = *; @@ -31,10 +31,6 @@ empty = exists = *:empty; - -// ---------------------------------------------------------------------------- -// TODO these are implicit - only here for debugging - ast.Expr = *; @@ -44,9 +40,11 @@ ast.Stmt = ast.Decl = *; - // ---------------------------------------------------------------------------- -// Comments +// Tokens and comments + +token.Token = + ^:string; ast.Comment = Text:string [Text:isMultiLineComment "\n"]; diff --git a/usr/gri/pretty/format.go b/usr/gri/pretty/format.go index 238d5ca24d0..a671beba3ce 100644 --- a/usr/gri/pretty/format.go +++ b/usr/gri/pretty/format.go @@ -2,25 +2,29 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -/* The format package implements syntax-directed formatting of arbitrary - data structures. +/* The format package implements syntax-directed, type-driven formatting + of arbitrary data structures. Formatting a data structure consists of + two phases: first, a format specification is parsed (once per format) + which results in a "compiled" format. The format can then be used + repeatedly to print arbitrary values to a io.Writer. - A format specification consists of a set of named productions in EBNF. - The production names correspond to the type names of the data structure - to be printed. The production expressions consist of literal values - (strings), references to fields, and alternative, grouped, optional, - and repetitive sub-expressions. + A format specification consists of a set of named format rules in EBNF. + The rule names correspond to the type names of the data structure to be + formatted. Each format rule consists of literal values and struct field + names which are combined into sequences, alternatives, grouped, optional, + repeated, or indented sub-expressions. Additionally, format rules may be + specified via Go formatter functions. - When printing a value, its type name is used to look up the production - to be printed. Literal values are printed as is, field references are - resolved and the respective field values are printed instead (using their - type-specific productions), and alternative, grouped, optional, and - repetitive sub-expressions are printed depending on whether they contain - "empty" fields or not. A field is empty if its value is nil. + When formatting a value, its type name determines the format rule. The + syntax of the rule or the corresponding formatter function determines + if and how the value is formatted. A format rule may refer to a struct + field of the current value. In this case the same mechanism is applied + recursively to that field. */ package format import ( + "container/vector"; "flag"; "fmt"; "go/scanner"; @@ -37,10 +41,20 @@ import ( // ---------------------------------------------------------------------------- // Format representation -type ( - Formatter func(w io.Writer, env, value interface{}, name string) bool; - FormatterMap map[string]Formatter; -) +// Custom formatters implement the Formatter function type. +// A formatter is invoked with a writer w, an environment env +// (provided to format.Fprint and simply passed through), the +// value to format, and the rule name under which the formatter +// was installed (the same formatter function may be installed +// under different names). +// +type Formatter func(w io.Writer, env, value interface{}, rule_name string) bool + + +// A FormatterMap is a set of custom formatters. +// It maps a rule name to a formatter. +// +type FormatterMap map [string] Formatter; // A production expression is built from the following nodes. @@ -48,21 +62,15 @@ type ( type ( expr interface {}; - alternative struct { - x, y expr; // x | y - }; + alternatives []expr; // x | y | z - sequence struct { - x, y expr; // x y - }; + sequence []expr; // x y z - literal struct { - value []byte; - }; + // a literal is represented as string or []byte field struct { - fname string; // including "^", "*" - tname string; // "" if no tname specified + field_name string; // including "^", "*" + rule_name string; // "" if no rule name specified }; indentation struct { @@ -78,131 +86,131 @@ type ( }; custom struct { - name string; + rule_name string; form Formatter }; ) -/* A Format is a set of production expressions. A new format is - created explicitly by calling Parse, or implicitly by one of - the Xprintf functions. +/* The syntax of a format specification is presented in the same EBNF + notation as used in the Go language spec. The syntax of white space, + comments, identifiers, and string literals is the same as in Go. - Formatting rules are specified in the following syntax: + A format specification consists of a possibly empty set of package + declarations and format rules: - Format = Production { ";" Production } [ ";" ] . - Production = ( Name | "default" | "/" ) "=" Expression . - Name = identifier { "." identifier } . - Expression = [ Term ] { "|" [ Term ] } . - Term = Factor { Factor } . - Factor = string_literal | Indent | Field | Group | Option | Repetition . - Field = ( "^" | "*" | Name ) [ ":" Name ] . - Indent = ">>" Factor Expression "<<" . + Format = [ Entry { ";" Entry } ] [ ";" ] . + Entry = PackageDecl | FormatRule . + + A package declaration binds a package name (such as 'ast') to a + package import path (such as '"go/ast"'). A package name must be + declared at most once. + + PackageDecl = PackageName ImportPath . + PackageName = identifier . + ImportPath = string . + + A format rule binds a rule name to a format expression. A rule name + may be a type name or one of the special names 'default' (denoting + the default rule) or '/' (denoting the global "divider" rule - see + below). A type name may be the name of a predeclared type ('int', + 'float32', etc.), the name of an anonymous composite type ('array', + 'pointer', etc.), or the name of a user-defined type qualified by + the corresponding package name (for instance 'ast.MapType'). The + package name must have been declared already. A rule name must be + declared at most once. + + FormatRule = RuleName "=" Expression . + RuleName = TypeName | "default" | "/" . + TypeName = [ PackageName "." ] identifier . + + A format expression specifies how a value is to be formatted. In its + most general form, a format expression is a set of alternatives separated + by "|". Each alternative and the entire expression may be empty. + + Expression = [ Sequence ] { "|" [ Sequence ] } . + Sequence = Operand { Operand } . + Operand = Literal | Field | Indentation | Group | Option | Repetition . + + Literal = string . + Field = FieldName [ ":" RuleName ] . + FieldName = identifier | "^" | "*" . + + Indent = ">>" Operand Expression "<<" . Group = "(" Expression ")" . Option = "[" Expression "]" . Repetition = "{" Expression [ "/" Expression ] "}" . - The syntax of white space, comments, identifiers, and string literals is - the same as in Go. - - A production name corresponds to a Go type name of the form - - PackageName.TypeName - - (for instance format.Format). A production of the form - - Name; - - specifies a package name which is prepended to all subsequent production - names: - - format; - Format = ... // this production matches the type format.Format - - The basic operands of productions are string literals, field names, and - designators. String literals are printed as is, unless they contain a - single %-style format specifier (such as "%d"). In that case, they are - used as the format for fmt.Printf, with the current value as argument. - - The designator "^" stands for the current value; a "*" denotes indirection - (pointers, arrays, maps, and interfaces). - - A field may contain a format specifier of the form - - : Name - - which specifies the field format rule irrespective of the field type. - - TODO complete this description + TODO complete this comment */ -type Format struct { - // TODO(gri) Eventually have import path info here - // once reflect provides import paths. - rules map [string] expr; -} - +type Format map [string] expr; // ---------------------------------------------------------------------------- // Error handling -// Error implements an os.Error that may be returned as a -// result of calling Parse or any of the print functions. -// +// Error describes an individual error. The position Pos, if valid, +// indicates the format source position the error relates to. The +// error is specified with the Msg string. +// type Error struct { - Pos token.Position; // source position, if any (otherwise Pos.Line == 0) - Msg string; // error message - Next *Error; // next error, if any (or nil) + Pos token.Position; + Msg string; } -// String converts a list of Error messages into a string, -// with one error per line. -// +// Error implements the os.Error interface. func (e *Error) String() string { - var buf io.ByteBuffer; - for ; e != nil; e = e.Next { - if e.Pos.Line > 0 { - fmt.Fprintf(&buf, "%d:%d: ", e.Pos.Line, e.Pos.Column); - } - fmt.Fprintf(&buf, "%s\n", e.Msg); + pos := ""; + if e.Pos.IsValid() { + pos = fmt.Sprintf("%d:%d: ", e.Pos.Line, e.Pos.Column); } - return string(buf.Data()); + return pos + e.Msg; +} + + +// Multiple parser errors are returned as an ErrorList. +type ErrorList []*Error + + +// ErrorList implements the SortInterface. +func (p ErrorList) Len() int { return len(p); } +func (p ErrorList) Swap(i, j int) { p[i], p[j] = p[j], p[i]; } +func (p ErrorList) Less(i, j int) bool { return p[i].Pos.Offset < p[j].Pos.Offset; } + + +// ErrorList implements the os.Error interface. +func (p ErrorList) String() string { + switch len(p) { + case 0: return "unspecified error"; + case 1: return p[0].String(); + } + return fmt.Sprintf("%s (and %d more errors)", p[0].String(), len(p) - 1); } // ---------------------------------------------------------------------------- // Parsing -/* TODO - - have a format to select type name, field tag, field offset? - - use field tag as default format for that field -*/ - type parser struct { - // scanning + errors vector.Vector; scanner scanner.Scanner; pos token.Position; // token position tok token.Token; // one token look-ahead lit []byte; // token literal - // errors - first, last *Error; + packs map [string] string; // PackageName -> ImportPath + rules Format; // RuleName -> Expression } -// The parser implements the scanner.ErrorHandler interface. +// The parser implements scanner.Error. func (p *parser) Error(pos token.Position, msg string) { - if p.last == nil || p.last.Pos.Line != pos.Line { - // only report error if not on the same line as previous error - // in the hope to reduce number of follow-up errors reported - err := &Error{pos, msg, nil}; - if p.last == nil { - p.first = err; - } else { - p.last.Next = err; - } - p.last = err; + // Don't collect errors that are on the same line as the previous error + // in the hope to reduce the number of spurious errors due to incorrect + // parser synchronization. + if p.errors.Len() == 0 || p.errors.Last().(*Error).Pos.Line != pos.Line { + p.errors.Push(&Error{pos, msg}); } } @@ -243,36 +251,118 @@ func (p *parser) parseIdentifier() string { } -func (p *parser) parseName() string { - name := p.parseIdentifier(); - for p.tok == token.PERIOD { +func (p *parser) parseTypeName() (string, bool) { + pos := p.pos; + name, is_ident := p.parseIdentifier(), true; + if p.tok == token.PERIOD { + // got a package name, lookup package + if import_path, found := p.packs[name]; found { + name = import_path; + } else { + p.Error(pos, "package not declared: " + name); + } p.next(); - name = name + "." + p.parseIdentifier(); + name, is_ident = name + "." + p.parseIdentifier(), false; } - return name; + return name, is_ident; } -func (p *parser) parseValue() []byte { - if p.tok != token.STRING { - p.expect(token.STRING); - return nil; // TODO should return something else? +// Parses a rule name and returns it. If the rule name is +// a package-qualified type name, the package name is resolved. +// The 2nd result value is true iff the rule name consists of a +// single identifier only (and thus could be a package name). +// +func (p *parser) parseRuleName() (string, bool) { + name, is_ident := "", false; + switch p.tok { + case token.IDENT: + name, is_ident = p.parseTypeName(); + case token.DEFAULT: + name = "default"; + p.next(); + case token.QUO: + name = "/"; + p.next(); + default: + p.error_expected(p.pos, "rule name"); + p.next(); // make progress in any case } + return name, is_ident; +} - // TODO get rid of back-and-forth conversions - // (change value to string?) - s, err := strconv.Unquote(string(p.lit)); - if err != nil { - panic("scanner error"); + +func asLiteral(x interface{}) expr { + s := x.(string); + if len(s) > 0 && s[0] == '%' { + // literals containing format characters are represented as strings + return s; } - - p.next(); + // all other literals are represented as []byte for faster writing return io.StringBytes(s); } -func (p *parser) parseFactor() (x expr) -func (p *parser) parseExpression() expr +func (p *parser) parseLiteral() expr { + if p.tok != token.STRING { + p.expect(token.STRING); + return ""; + } + + s, err := strconv.Unquote(string(p.lit)); + if err != nil { + panic("scanner error"); + } + p.next(); + + // A string literal may contain newline characters and %-format specifiers. + // To simplify and speed up printing of the literal, split it into segments + // that start with "\n" or "%" (but noy "%%"), possibly followed by a last + // segment that starts with some other character. If there is more than one + // such segment, return a sequence of "simple" literals, otherwise just + // return the string. + + // split string + var list vector.Vector; + list.Init(0); + i0 := 0; + for i := 0; i < len(s); i++ { + switch s[i] { + case '\n': + // next segment starts with '\n' + case '%': + if i+1 >= len(s) || s[i+1] == '%' { + i++; + continue; // "%%" is not a format-% + } + // next segment starts with '%' + default: + // all other cases do not split the string + continue; + } + // split off the current segment + if i0 < i { + list.Push(s[i0 : i]); + i0 = i; + } + } + // the final segment may start with any character + // (it is empty iff the string is empty) + list.Push(s[i0 : len(s)]); + + // no need for a sequence there is only one segment + if list.Len() == 1 { + return asLiteral(list.At(0)); + } + + // convert list into a sequence + seq := make(sequence, list.Len()); + for i := 0; i < list.Len(); i++ { + seq[i] = asLiteral(list.At(i)); + } + return seq; +} + func (p *parser) parseField() expr { var fname string; @@ -284,34 +374,36 @@ func (p *parser) parseField() expr { fname = "*"; p.next(); case token.IDENT: - // TODO use reflect.ExpandType() to lookup a field - // during parse-time if posssible - fname = p.parseName(); + // TODO(gri) could use reflect.ExpandType() to lookup a field + // at parse-time - would provide "compile-time" errors and + // faster printing. + fname = p.parseIdentifier(); default: return nil; } - var tname string; + var rule_name string; if p.tok == token.COLON { p.next(); - tname = p.parseName(); + var _ bool; + rule_name, _ = p.parseRuleName(); } - return &field{fname, tname}; + return &field{fname, rule_name}; } -func (p *parser) parseFactor() (x expr) { +func (p *parser) parseExpression() expr + +func (p *parser) parseOperand() (x expr) { switch p.tok { case token.STRING: - x = &literal{p.parseValue()}; + x = p.parseLiteral(); case token.SHR: p.next(); - iexpr := p.parseFactor(); - body := p.parseExpression(); + x = &indentation{p.parseOperand(), p.parseExpression()}; p.expect(token.SHL); - return &indentation{iexpr, body}; case token.LPAREN: p.next(); @@ -335,73 +427,104 @@ func (p *parser) parseFactor() (x expr) { p.expect(token.RBRACE); default: - x = p.parseField(); + x = p.parseField(); // may be nil } return x; } -func (p *parser) parseTerm() expr { - x := p.parseFactor(); +func (p *parser) parseSequence() expr { + var list vector.Vector; + list.Init(0); - if x != nil { - for { - y := p.parseFactor(); - if y == nil { - break; - } - x = &sequence{x, y}; - } + for x := p.parseOperand(); x != nil; x = p.parseOperand() { + list.Push(x); } - return x; + // no need for a sequence if list.Len() < 2 + switch list.Len() { + case 0: return nil; + case 1: return list.At(0).(expr); + } + + // convert list into a sequence + seq := make(sequence, list.Len()); + for i := 0; i < list.Len(); i++ { + seq[i] = list.At(i).(expr); + } + return seq; } func (p *parser) parseExpression() expr { - x := p.parseTerm(); + var list vector.Vector; + list.Init(0); - for p.tok == token.OR { + for { + x := p.parseSequence(); + if x != nil { + list.Push(x); + } + if p.tok != token.OR { + break; + } p.next(); - y := p.parseTerm(); - x = &alternative{x, y}; } - return x; + // no need for an alternatives if list.Len() < 2 + switch list.Len() { + case 0: return nil; + case 1: return list.At(0).(expr); + } + + // convert list into a alternatives + alt := make(alternatives, list.Len()); + for i := 0; i < list.Len(); i++ { + alt[i] = list.At(i).(expr); + } + return alt; } -func (p *parser) parseProduction() (string, expr) { - var name string; - switch p.tok { - case token.DEFAULT: - p.next(); - name = "default"; - case token.QUO: - p.next(); - name = "/"; - default: - name = p.parseName(); - } - p.expect(token.ASSIGN); - x := p.parseExpression(); - return name, x; -} - - -func (p *parser) parseFormat() *Format { - rules := make(map [string] expr); - +func (p *parser) parseFormat() { for p.tok != token.EOF { pos := p.pos; - name, x := p.parseProduction(); - // add production to rules - if t, found := rules[name]; !found { - rules[name] = x; - } else { - p.Error(pos, "production already declared: " + name); + name, is_ident := p.parseRuleName(); + switch p.tok { + case token.STRING: + // package declaration + import_path, err := strconv.Unquote(string(p.lit)); + if err != nil { + panic("scanner error"); + } + p.next(); + + // add package declaration + if !is_ident { + p.Error(pos, "illegal package name: " + name); + } else if _, found := p.packs[name]; !found { + p.packs[name] = import_path; + } else { + p.Error(pos, "package already declared: " + name); + } + + case token.ASSIGN: + // format rule + p.next(); + x := p.parseExpression(); + + // add rule + if _, found := p.rules[name]; !found { + p.rules[name] = x; + } else { + p.Error(pos, "format rule already declared: " + name); + } + + default: + p.error_expected(p.pos, "package declaration or format rule"); + p.next(); // make progress in any case } if p.tok == token.SEMICOLON { @@ -411,69 +534,104 @@ func (p *parser) parseFormat() *Format { } } p.expect(token.EOF); +} - return &Format{rules}; + +func (p *parser) remap(pos token.Position, name string) string { + i := strings.Index(name, "."); + if i >= 0 { + package_name := name[0 : i]; + type_name := name[i : len(name)]; + // lookup package + if import_path, found := p.packs[package_name]; found { + name = import_path + "." + type_name; + } else { + p.Error(pos, "package not declared: " + package_name); + } + } + return name; } // Parse parses a set of format productions from source src. If there are no // errors, the result is a Format and the error is nil. Otherwise the format -// is nil and the os.Error string contains a line for each error encountered. +// is nil and a non-empty ErrorList is returned. // -func Parse(src []byte, fmap FormatterMap) (*Format, os.Error) { +func Parse(src []byte, fmap FormatterMap) (Format, os.Error) { // parse source var p parser; + p.errors.Init(0); p.scanner.Init(src, &p, false); p.next(); - f := p.parseFormat(); + p.packs = make(map [string] string); + p.rules = make(Format); + p.parseFormat(); // add custom formatters, if any - // TODO should we test that name is a legal name? + var invalidPos token.Position; for name, form := range fmap { - if t, found := f.rules[name]; !found { - f.rules[name] = &custom{name, form}; + name = p.remap(invalidPos, name); + if t, found := p.rules[name]; !found { + p.rules[name] = &custom{name, form}; } else { - p.Error(token.Position{0, 0, 0}, "formatter already declared: " + name); + var invalidPos token.Position; + p.Error(invalidPos, "formatter already declared: " + name); } } - if p.first != nil { - return nil, p.first; + // convert errors list, if any + if p.errors.Len() > 0 { + errors := make(ErrorList, p.errors.Len()); + for i := 0; i < p.errors.Len(); i++ { + errors[i] = p.errors.At(i).(*Error); + } + return nil, errors; } - return f, nil; + return p.rules, nil; } // ---------------------------------------------------------------------------- // Formatting +// The current formatting state. type state struct { - f *Format; - env interface{}; - sep expr; + f Format; // the format used + env interface{}; // the user-supplied environment, simply passed through + def expr; // the default rule, if any + div expr; // the global divider rule, if any + writediv bool; // true if the divider needs to be written errors chan os.Error; // not chan *Error: errors <- nil would be wrong! - indent io.ByteBuffer; + indent io.ByteBuffer; // the current indentation } -func (ps *state) init(f *Format, env interface{}) { +func (ps *state) init(f Format, env interface{}, errors chan os.Error) { ps.f = f; ps.env = env; - // if we have a separator ("/") production, cache it for easy access - if sep, has_sep := f.rules["/"]; has_sep { - ps.sep = sep; + // if we have a default ("default") rule, cache it for fast access + if def, has_def := f["default"]; has_def { + ps.def = def; } - ps.errors = make(chan os.Error); + // if we have a divider ("/") rule, cache it for fast access + if div, has_div := f["/"]; has_div { + ps.div = div; + } + ps.errors = errors; } func (ps *state) error(msg string) { - ps.errors <- &Error{token.Position{0, 0, 0}, msg, nil}; + ps.errors <- os.NewError(msg); runtime.Goexit(); } +// Get a field value given a field name. Returns the field value and +// the "embedding level" at which it was found. The embedding level +// is 0 for top-level fields in a struct. +// func getField(val reflect.Value, fieldname string) (reflect.Value, int) { // do we have a struct in the first place? if val.Kind() != reflect.StructKind { @@ -492,7 +650,7 @@ func getField(val reflect.Value, fieldname string) (reflect.Value, int) { // look for field in anonymous fields var field reflect.Value; - level := 1000; // infinity + level := 1000; // infinity (no struct has that many levels) for i := 0; i < styp.Len(); i++ { name, typ, tag, offset := styp.Field(i); if name == "" { @@ -548,52 +706,35 @@ func typename(value reflect.Value) string { func (ps *state) getFormat(name string) expr { - if fexpr, found := ps.f.rules[name]; found { + if fexpr, found := ps.f[name]; found { return fexpr; } - if fexpr, found := ps.f.rules["default"]; found { - return fexpr; + if ps.def != nil { + return ps.def; } ps.error(fmt.Sprintf("no production for type: '%s'\n", name)); - panic("unreachable"); return nil; } -// Count the number of printf-style '%' formatters in s. -// -func percentCount(s []byte) int { - n := 0; - for i := 0; i < len(s); i++ { - if s[i] == '%' { - i++; - if i >= len(s) || s[i] != '%' { // don't count "%%" - n++; - } - } +func (ps *state) printf(w io.Writer, fexpr expr, value reflect.Value, index int) bool + + +func (ps *state) printDiv(w io.Writer, value reflect.Value) { + if ps.div != nil && ps.writediv { + div := ps.div; + ps.div = nil; + ps.printf(w, div, value, 0); + ps.div = div; } - return n; + ps.writediv = true; } -func (ps *state) rawPrintf(w io.Writer, format []byte, value reflect.Value) { - // TODO find a better way to do this - x := value.Interface(); - switch percentCount(format) { - case 0: w.Write(format); - case 1: fmt.Fprintf(w, string(format), x); - case 2: fmt.Fprintf(w, string(format), x, x); - case 3: fmt.Fprintf(w, string(format), x, x, x); - case 4: fmt.Fprintf(w, string(format), x, x, x, x); - default: panic("no support for more than 4 '%'-format chars yet"); - } -} - - -func (ps *state) printIndented(w io.Writer, s []byte) { - // replace each '\n' with the indent + '\n' +func (ps *state) writeIndented(w io.Writer, s []byte) { + // write indent after each '\n' i0 := 0; for i := 0; i < len(s); i++ { if s[i] == '\n' { @@ -606,65 +747,58 @@ func (ps *state) printIndented(w io.Writer, s []byte) { } -func (ps *state) printf(w io.Writer, format []byte, value reflect.Value) { - if ps.indent.Len()== 0 { - // no indentation - ps.rawPrintf(w, format, value); - } else { - // print into temporary buffer - var buf io.ByteBuffer; - ps.rawPrintf(&buf, format, value); - ps.printIndented(w, buf.Data()); - } -} - - +// TODO complete this comment // Returns true if a non-empty field value was found. -func (ps *state) print(w io.Writer, fexpr expr, value reflect.Value, index int) bool { +func (ps *state) printf(w io.Writer, fexpr expr, value reflect.Value, index int) bool { if fexpr == nil { return true; } switch t := fexpr.(type) { - case *alternative: - // - print the contents of the first alternative with a non-empty field - // - result is true if there is at least one non-empty field - var buf io.ByteBuffer; - if ps.print(&buf, t.x, value, 0) { - w.Write(buf.Data()); - return true; - } else { + case alternatives: + // - write first non-empty alternative + // - result is not empty iff there is an non-empty alternative + for _, x := range t { var buf io.ByteBuffer; - if ps.print(&buf, t.y, value, 0) { + if ps.printf(&buf, x, value, 0) { w.Write(buf.Data()); return true; } } return false; - case *sequence: - // - print the contents of the sequence - // - result is true if there is no empty field - // TODO do we need to buffer here? why not? - b := ps.print(w, t.x, value, index); - // TODO should invoke separator only inbetween terminal symbols? - if ps.sep != nil { - b = ps.print(w, ps.sep, value, index) && b; + case sequence: + // - write every element of the sequence + // - result is not empty iff no element was empty + b := true; + for _, x := range t { + b = ps.printf(w, x, value, index) && b; } - return ps.print(w, t.y, value, index) && b; + return b; - case *literal: - // - print the literal - // - result is always true (literal is never empty) - ps.printf(w, t.value, value); + case []byte: + // write literal, may start with "\n" + ps.printDiv(w, value); + if len(t) > 0 && t[0] == '\n' && ps.indent.Len() > 0 { + // newline must be followed by indentation + w.Write([]byte{'\n'}); + w.Write(ps.indent.Data()); + t = t[1 : len(t)]; + } + w.Write(t); + return true; + + case string: + // write format literal with value, starts with "%" (but not "%%") + ps.printDiv(w, value); + fmt.Fprintf(w, t, value.Interface()); return true; case *field: - // - print the contents of the field + // - write the contents of the field // - format is either the field format or the type-specific format - // - TODO look at field tag for default format - // - result is true if the field is not empty - switch t.fname { + // - result is not empty iff the field is not empty + switch t.field_name { case "^": // identity - value doesn't change @@ -693,49 +827,52 @@ func (ps *state) print(w io.Writer, fexpr expr, value reflect.Value, index int) value = v.Value(); default: - // TODO fix this ps.error(fmt.Sprintf("error: * does not apply to `%s`\n", value.Type().Name())); } default: // field - field, _ := getField(value, t.fname); + field, _ := getField(value, t.field_name); if field == nil { - ps.error(fmt.Sprintf("error: no field `%s` in `%s`\n", t.fname, value.Type().Name())); + ps.error(fmt.Sprintf("error: no field `%s` in `%s`\n", t.field_name, value.Type().Name())); } value = field; } - // determine format - tname := t.tname; - if tname == "" { - tname = typename(value) + // field-specific rule name + rule_name := t.rule_name; + if rule_name == "" { + rule_name = typename(value) } - fexpr = ps.getFormat(tname); + fexpr = ps.getFormat(rule_name); - return ps.print(w, fexpr, value, index); + return ps.printf(w, fexpr, value, index); case *indentation: + // - write the body within the given indentation + // - the result is not empty iff the body is not empty saved_len := ps.indent.Len(); - ps.print(&ps.indent, t.indent, value, index); // add additional indentation - b := ps.print(w, t.body, value, index); + ps.printf(&ps.indent, t.indent, value, index); // add additional indentation + b := ps.printf(w, t.body, value, index); ps.indent.Truncate(saved_len); // reset indentation return b; case *option: - // print the contents of the option if it contains a non-empty field + // - write body if it is not empty + // - the result is always not empty var buf io.ByteBuffer; - if ps.print(&buf, t.body, value, 0) { + if ps.printf(&buf, t.body, value, 0) { w.Write(buf.Data()); } return true; case *repetition: - // print the contents of the repetition while there is a non-empty field + // - write body until as long as it is not empty + // - the result is always not empty var buf io.ByteBuffer; - for i := 0; ps.print(&buf, t.body, value, i); i++ { + for i := 0; ps.printf(&buf, t.body, value, i); i++ { if i > 0 { - ps.print(w, t.div, value, i); + ps.printf(w, t.div, value, i); } w.Write(buf.Data()); buf.Reset(); @@ -743,9 +880,10 @@ func (ps *state) print(w io.Writer, fexpr expr, value reflect.Value, index int) return true; case *custom: + // - invoke custom formatter var buf io.ByteBuffer; - if t.form(&buf, ps.env, value.Interface(), t.name) { - ps.printIndented(w, buf.Data()); + if t.form(&buf, ps.env, value.Interface(), t.rule_name) { + ps.writeIndented(w, buf.Data()); return true; } return false; @@ -756,41 +894,74 @@ func (ps *state) print(w io.Writer, fexpr expr, value reflect.Value, index int) } -// Fprint formats each argument according to the format f -// and writes to w. +// Sandbox to wrap a writer. +// Counts total number of bytes written and handles write errors. // -func (f *Format) Fprint(w io.Writer, env interface{}, args ...) (int, os.Error) { +type sandbox struct { + writer io.Writer; + written int; + errors chan os.Error; +} + + +// Write data to the sandboxed writer. If an error occurs, Write +// doesn't return. Instead it reports the error to the errors +// channel and exits the current goroutine. +// +func (s *sandbox) Write(data []byte) (int, os.Error) { + n, err := s.writer.Write(data); + s.written += n; + if err != nil { + s.errors <- err; + runtime.Goexit(); + } + return n, nil; +} + + +// Fprint formats each argument according to the format f +// and writes to w. The result is the total number of bytes +// written and an os.Error, if any. +// +func (f Format) Fprint(w io.Writer, env interface{}, args ...) (int, os.Error) { + errors := make(chan os.Error); + sw := sandbox{w, 0, errors}; + var ps state; - ps.init(f, env); + ps.init(f, env, errors); go func() { value := reflect.NewValue(args).(reflect.StructValue); for i := 0; i < value.Len(); i++ { fld := value.Field(i); - ps.print(w, ps.getFormat(typename(fld)), fld, 0); + ps.printf(&sw, ps.getFormat(typename(fld)), fld, 0); } - ps.errors <- nil; // no errors + errors <- nil; // no errors }(); - // TODO return correct value for count instead of 0 - return 0, <-ps.errors; + return sw.written, <-errors; } // Print formats each argument according to the format f -// and writes to standard output. +// and writes to standard output. The result is the total +// number of bytes written and an os.Error, if any. // -func (f *Format) Print(args ...) (int, os.Error) { +func (f Format) Print(args ...) (int, os.Error) { return f.Fprint(os.Stdout, nil, args); } // Sprint formats each argument according to the format f -// and returns the resulting string. +// and returns the resulting string. If an error occurs +// during formatting, the result contains the respective +// error message at the end. // -func (f *Format) Sprint(args ...) string { +func (f Format) Sprint(args ...) string { var buf io.ByteBuffer; - // TODO what to do in case of errors? - f.Fprint(&buf, nil, args); + n, err := f.Fprint(&buf, nil, args); + if err != nil { + fmt.Fprintf(&buf, "--- Sprint(%v) failed: %v", args, err); + } return string(buf.Data()); } diff --git a/usr/gri/pretty/format_test.go b/usr/gri/pretty/format_test.go index c23ad48ed4a..2add36f09b8 100644 --- a/usr/gri/pretty/format_test.go +++ b/usr/gri/pretty/format_test.go @@ -14,7 +14,7 @@ import ( func check(t *testing.T, form, expected string, args ...) { f, err := format.Parse(io.StringBytes(form), nil); if err != nil { - panic(err.String()); + panic(form + ": " + err.String()); } result := f.Sprint(args); if result != expected { @@ -51,6 +51,7 @@ type T1 struct { } const F1 = + `format "format";` `int = "%d";` `format.T1 = "<" a ">";` @@ -95,11 +96,13 @@ type T3 struct { } const F3a = + `format "format";` `default = "%v";` `array = *;` `format.T3 = s {" " a a / ","};` const F3b = + `format "format";` `int = "%d";` `string = "%s";` `array = *;` @@ -124,6 +127,7 @@ type T4 struct { } const F4a = + `format "format";` `int = "%d";` `pointer = *;` `array = *;` @@ -132,6 +136,7 @@ const F4a = `format.T4 = "<" (x:empty x | "-") ">" ` const F4b = + `format "format";` `int = "%d";` `pointer = *;` `array = *;` diff --git a/usr/gri/pretty/pretty.go b/usr/gri/pretty/pretty.go index 56b36e45a38..dfad3005107 100644 --- a/usr/gri/pretty/pretty.go +++ b/usr/gri/pretty/pretty.go @@ -120,7 +120,7 @@ func main() { } ast_format, err := format.Parse(src, fmap); if err != nil { - fmt.Fprintf(os.Stderr, "%s: format errors:\n%s", ast_txt, err); + fmt.Fprintf(os.Stderr, "%s:%v\n", ast_txt, err); os.Exit(1); }