From f5f23e075e179ddbb518683e0762f27cd59018dd Mon Sep 17 00:00:00 2001 From: Robert Griesemer Date: Wed, 13 Jun 2012 13:32:29 -0700 Subject: [PATCH] go/ast: comment map implementation A comment map associates comments with AST nodes and permits correct updating of the AST's comment list when the AST is manipulated. R=rsc CC=golang-dev https://golang.org/cl/6281044 --- src/pkg/go/ast/commentmap.go | 268 ++++++++++++++++++++++++++++++ src/pkg/go/ast/commentmap_test.go | 143 ++++++++++++++++ 2 files changed, 411 insertions(+) create mode 100644 src/pkg/go/ast/commentmap.go create mode 100644 src/pkg/go/ast/commentmap_test.go diff --git a/src/pkg/go/ast/commentmap.go b/src/pkg/go/ast/commentmap.go new file mode 100644 index 00000000000..a732f91954f --- /dev/null +++ b/src/pkg/go/ast/commentmap.go @@ -0,0 +1,268 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ast + +import ( + "go/token" + "sort" +) + +type byPos []*CommentGroup + +func (a byPos) Len() int { return len(a) } +func (a byPos) Less(i, j int) bool { return a[i].Pos() < a[j].Pos() } +func (a byPos) Swap(i, j int) { a[i], a[j] = a[j], a[i] } + +// sortComments sorts the list of comment groups in source order. +// +func sortComments(list []*CommentGroup) { + // TODO(gri): Does it make sense to check for sorted-ness + // first (because we know that sorted-ness is + // very likely)? + if orderedList := byPos(list); !sort.IsSorted(orderedList) { + sort.Sort(orderedList) + } +} + +// A CommentMap maps an AST node to a list of comment groups +// associated with it. See NewCommentMap for a description of +// the association. +// +type CommentMap map[Node][]*CommentGroup + +func (cmap CommentMap) addComment(n Node, c *CommentGroup) { + list := cmap[n] + if len(list) == 0 { + list = []*CommentGroup{c} + } else { + list = append(list, c) + } + cmap[n] = list +} + +type byInterval []Node + +func (a byInterval) Len() int { return len(a) } +func (a byInterval) Less(i, j int) bool { + pi, pj := a[i].Pos(), a[j].Pos() + return pi < pj || pi == pj && a[i].End() > a[j].End() +} +func (a byInterval) Swap(i, j int) { a[i], a[j] = a[j], a[i] } + +// nodeList returns the list of nodes of the AST n in source order. +// +func nodeList(n Node) []Node { + var list []Node + Inspect(n, func(n Node) bool { + // don't collect comments + switch n.(type) { + case nil, *CommentGroup, *Comment: + return false + } + list = append(list, n) + return true + }) + // Note: The current implementation assumes that Inspect traverses the + // AST in depth-first and thus _source_ order. If AST traversal + // does not follow source order, the sorting call below will be + // required. + // sort.Sort(byInterval(list)) + return list +} + +// A commentListReader helps iterating through a list of comment groups. +// +type commentListReader struct { + fset *token.FileSet + list []*CommentGroup + index int + comment *CommentGroup // comment group at current index + pos, end token.Position // source interval of comment group at current index +} + +func (r *commentListReader) eol() bool { + return r.index >= len(r.list) +} + +func (r *commentListReader) next() { + if !r.eol() { + r.comment = r.list[r.index] + r.pos = r.fset.Position(r.comment.Pos()) + r.end = r.fset.Position(r.comment.End()) + r.index++ + } +} + +// A nodeStack keeps track of nested nodes. +// A node lower on the stack lexically contains the nodes higher on the stack. +// +type nodeStack []Node + +// push pops all nodes that appear lexically before n +// and then pushes n on the stack. +// +func (s *nodeStack) push(n Node) { + s.pop(n.Pos()) + *s = append((*s), n) +} + +// pop pops all nodes that appear lexically before pos +// (i.e., whose lexical extent has ended before or at pos). +// It returns the last node popped. +// +func (s *nodeStack) pop(pos token.Pos) (top Node) { + i := len(*s) + for i > 0 && (*s)[i-1].End() <= pos { + top = (*s)[i-1] + i-- + } + *s = (*s)[0:i] + return top +} + +// NewCommentMap creates a new comment map by associating comment groups +// to nodes. The nodes are the nodes of the given AST f and the comments +// are taken from f.Comments. +// +// A comment group g is associated with a node n if: +// +// - g starts on the same line as n ends +// - g starts on the line immediately following n, and there is +// at least one empty line after g and before the next node +// - g starts before n and is not associated to the node before n +// via the previous rules +// +// NewCommentMap tries to associate a comment group to the "largest" +// node possible: For instance, if the comment is a line comment +// trailing an assignment, the comment is associated with the entire +// assignment rather than just the last operand in the assignment. +// +func NewCommentMap(fset *token.FileSet, f *File) CommentMap { + if len(f.Comments) == 0 { + return nil // no comments to map + } + + cmap := make(CommentMap) + + // set up comment reader r + comments := make([]*CommentGroup, len(f.Comments)) + copy(comments, f.Comments) // don't change f.Comments + sortComments(comments) + r := commentListReader{fset: fset, list: comments} // !r.eol() because len(comments) > 0 + r.next() + + // create node list in lexical order + nodes := nodeList(f) + nodes = append(nodes, nil) // append sentinel + + // set up iteration variables + var ( + p Node // previous node + pend token.Position // end of p + pg Node // previous node group (enclosing nodes of "importance") + pgend token.Position // end of pg + stack nodeStack // stack of node groups + ) + + for _, q := range nodes { + var qpos token.Position + if q != nil { + qpos = fset.Position(q.Pos()) // current node position + } else { + // set fake sentinel position to infinity so that + // all comments get processed before the sentinel + const infinity = 1 << 30 + qpos.Offset = infinity + qpos.Line = infinity + } + + // process comments before current node + for r.end.Offset <= qpos.Offset { + // determine recent node group + if top := stack.pop(r.comment.Pos()); top != nil { + pg = top + pgend = fset.Position(pg.End()) + } + // Try to associate a comment first with a node group + // (i.e., a node of "importance" such as a declaration); + // if that fails, try to associate it with the most recent + // node. + // TODO(gri) try to simplify the logic below + var assoc Node + switch { + case pg != nil && + (pgend.Line == r.pos.Line || + pgend.Line+1 == r.pos.Line && r.end.Line+1 < qpos.Line): + // 1) comment starts on same line as previous node group ends, or + // 2) comment starts on the line immediately after the + // previous node group and there is an empty line before + // the current node + // => associate comment with previous node group + assoc = pg + case p != nil && + (pend.Line == r.pos.Line || + pend.Line+1 == r.pos.Line && r.end.Line+1 < qpos.Line || + q == nil): + // same rules apply as above for p rather than pg, + // but also associate with p if we are at the end (q == nil) + assoc = p + default: + // otherwise, associate comment with current node + if q == nil { + // we can only reach here if there was no p + // which would imply that there were no nodes + panic("internal error: no comments should be associated with sentinel") + } + assoc = q + } + cmap.addComment(assoc, r.comment) + if r.eol() { + return cmap + } + r.next() + } + + // update previous node + p = q + pend = fset.Position(p.End()) + + // update previous node group if we see an "important" node + switch q.(type) { + case *File, *Field, Decl, Spec, Stmt: + stack.push(q) + } + } + + return cmap +} + +// Filter returns a new comment map consisting of only those +// entries of cmap for which a corresponding node exists in +// any of the node trees provided. +// +func (cmap CommentMap) Filter(nodes ...Node) CommentMap { + umap := make(CommentMap) + for _, n := range nodes { + Inspect(n, func(n Node) bool { + if g := cmap[n]; len(g) > 0 { + umap[n] = g + } + return true + }) + } + return umap +} + +// Comments returns the list of comment groups in the comment map. +// The result is sorted is source order. +// +func (cmap CommentMap) Comments() []*CommentGroup { + list := make([]*CommentGroup, 0, len(cmap)) + for _, e := range cmap { + list = append(list, e...) + } + sortComments(list) + return list +} diff --git a/src/pkg/go/ast/commentmap_test.go b/src/pkg/go/ast/commentmap_test.go new file mode 100644 index 00000000000..c622a4175f1 --- /dev/null +++ b/src/pkg/go/ast/commentmap_test.go @@ -0,0 +1,143 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// To avoid a cyclic dependency with go/parser, this file is in a separate package. + +package ast_test + +import ( + "bytes" + "fmt" + . "go/ast" + "go/parser" + "go/token" + "sort" + "testing" +) + +const src = ` +// the very first comment + +// package p +package p /* the name is p */ + +// imports +import ( + "bytes" // bytes + "fmt" // fmt + "go/ast" + "go/parser" +) + +// T +type T struct { + a, b, c int // associated with a, b, c + // associated with x, y + x, y float64 // float values + z complex128 // complex value +} +// also associated with T + +// x +var x = 0 // x = 0 +// also associated with x + +// f1 +func f1() { + /* associated with s1 */ + s1() + // also associated with s1 + + // associated with s2 + + // also associated with s2 + s2() // line comment for s2 +} +// associated with f1 +// also associated with f1 + +// associated with f2 + +// f2 +func f2() { +} + +func f3() { + i := 1 /* 1 */ + 2 // addition + _ = i +} + +// the very last comment +` + +// res maps a key of the form "line number: node type" +// to the associated comments' text. +// +var res = map[string]string{ + " 5: *ast.File": "the very first comment\npackage p\n", + " 5: *ast.Ident": " the name is p\n", + " 8: *ast.GenDecl": "imports\n", + " 9: *ast.ImportSpec": "bytes\n", + "10: *ast.ImportSpec": "fmt\n", + "16: *ast.GenDecl": "T\nalso associated with T\n", + "17: *ast.Field": "associated with a, b, c\n", + "19: *ast.Field": "associated with x, y\nfloat values\n", + "20: *ast.Field": "complex value\n", + "25: *ast.GenDecl": "x\nx = 0\nalso associated with x\n", + "29: *ast.FuncDecl": "f1\nassociated with f1\nalso associated with f1\n", + "31: *ast.ExprStmt": " associated with s1\nalso associated with s1\n", + "37: *ast.ExprStmt": "associated with s2\nalso associated with s2\nline comment for s2\n", + "45: *ast.FuncDecl": "associated with f2\nf2\n", + "49: *ast.AssignStmt": "addition\n", + "49: *ast.BasicLit": " 1\n", + "50: *ast.Ident": "the very last comment\n", +} + +func ctext(list []*CommentGroup) string { + var buf bytes.Buffer + for _, g := range list { + buf.WriteString(g.Text()) + } + return buf.String() +} + +func TestCommentMap(t *testing.T) { + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, "", src, parser.ParseComments) + if err != nil { + t.Fatal(err) + } + cmap := NewCommentMap(fset, f) + + // very correct association of comments + for n, list := range cmap { + key := fmt.Sprintf("%2d: %T", fset.Position(n.Pos()).Line, n) + got := ctext(list) + want := res[key] + if got != want { + t.Errorf("%s: got %q; want %q", key, got, want) + } + } + + // verify that no comments got lost + if n := len(cmap.Comments()); n != len(f.Comments) { + t.Errorf("got %d comment groups in map; want %d", n, len(f.Comments)) + } + + // support code to update test: + // set genMap to true to generate res map + const genMap = false + if genMap { + out := make([]string, 0, len(cmap)) + for n, list := range cmap { + out = append(out, fmt.Sprintf("\t\"%2d: %T\":\t%q,", fset.Position(n.Pos()).Line, n, ctext(list))) + } + sort.Strings(out) + for _, s := range out { + fmt.Println(s) + } + } +} + +// TODO(gri): add tests for Filter.