1
0
mirror of https://github.com/golang/go synced 2024-10-01 07:38:32 -06:00
go/dashboard/watcher/watcher.go
Brad Fitzpatrick a54d006617 dashboard/watcher: use Gerrit's JSON meta URL to poll smarter
Reduces our HTTP requests to Gerrit by a factor of the number of
subrepos we have.

Change-Id: I3f8fabeb70fdb5c276c639924baebcf5510fda9b
Reviewed-on: https://go-review.googlesource.com/1568
Reviewed-by: Andrew Gerrand <adg@golang.org>
Reviewed-by: Chris Manghane <cmang@golang.org>
2014-12-15 23:43:50 +00:00

776 lines
19 KiB
Go

// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Command watcher watches the specified repository for new commits
// and reports them to the build dashboard.
package main // import "golang.org/x/tools/dashboard/watcher"
import (
"bufio"
"bytes"
"encoding/json"
"errors"
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"net/http"
"net/url"
"os"
"os/exec"
"path"
"path/filepath"
"runtime"
"sort"
"strings"
"sync"
"time"
)
const (
goBase = "https://go.googlesource.com/"
watcherVersion = 3 // must match dashboard/app/build/handler.go's watcherVersion
origin = "origin/"
master = origin + "master" // name of the master branch
metaURL = goBase + "?b=master&format=JSON"
)
var (
repoURL = flag.String("repo", goBase+"go", "Repository URL")
dashboard = flag.String("dash", "https://build.golang.org/", "Dashboard URL (must end in /)")
keyFile = flag.String("key", defaultKeyFile, "Build dashboard key file")
pollInterval = flag.Duration("poll", 10*time.Second, "Remote repo poll interval")
network = flag.Bool("network", true, "Enable network calls (disable for testing)")
)
var (
defaultKeyFile = filepath.Join(homeDir(), ".gobuildkey")
dashboardKey = ""
networkSeen = make(map[string]bool) // track known hashes for testing
)
func main() {
flag.Parse()
go pollGerritAndTickle()
err := run()
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
// run is a little wrapper so we can use defer and return to signal
// errors. It should only return a non-nil error.
func run() error {
if !strings.HasSuffix(*dashboard, "/") {
return errors.New("dashboard URL (-dashboard) must end in /")
}
if k, err := readKey(); err != nil {
return err
} else {
dashboardKey = k
}
dir, err := ioutil.TempDir("", "watcher")
if err != nil {
return err
}
defer os.RemoveAll(dir)
errc := make(chan error)
go func() {
r, err := NewRepo(dir, *repoURL, "")
if err != nil {
errc <- err
return
}
errc <- r.Watch()
}()
subrepos, err := subrepoList()
if err != nil {
return err
}
for _, path := range subrepos {
go func(path string) {
url := goBase + strings.TrimPrefix(path, "golang.org/x/")
r, err := NewRepo(dir, url, path)
if err != nil {
errc <- err
return
}
errc <- r.Watch()
}(path)
}
// Must be non-nil.
return <-errc
}
// Repo represents a repository to be watched.
type Repo struct {
root string // on-disk location of the git repo
path string // base import path for repo (blank for main repo)
commits map[string]*Commit // keyed by full commit hash (40 lowercase hex digits)
branches map[string]*Branch // keyed by branch name, eg "release-branch.go1.3" (or empty for default)
}
// NewRepo checks out a new instance of the Mercurial repository
// specified by url to a new directory inside dir.
// The path argument is the base import path of the repository,
// and should be empty for the main Go repo.
func NewRepo(dir, url, path string) (*Repo, error) {
r := &Repo{
path: path,
root: filepath.Join(dir, filepath.Base(path)),
commits: make(map[string]*Commit),
branches: make(map[string]*Branch),
}
r.logf("cloning %v", url)
cmd := exec.Command("git", "clone", url, r.root)
if out, err := cmd.CombinedOutput(); err != nil {
return nil, fmt.Errorf("%v\n\n%s", err, out)
}
r.logf("loading commit log")
if err := r.update(false); err != nil {
return nil, err
}
r.logf("found %v branches among %v commits\n", len(r.branches), len(r.commits))
return r, nil
}
// Watch continuously runs "git fetch" in the repo, checks for
// new commits, and posts any new commits to the dashboard.
// It only returns a non-nil error.
func (r *Repo) Watch() error {
tickler := repoTickler(r.name())
for {
if err := r.fetch(); err != nil {
return err
}
if err := r.update(true); err != nil {
return err
}
remotes, err := r.remotes()
if err != nil {
return err
}
for _, name := range remotes {
b, ok := r.branches[name]
if !ok {
// skip branch; must be already merged
continue
}
if err := r.postNewCommits(b); err != nil {
return err
}
}
// We still run a timer but a very slow one, just
// in case the mechanism updating the repo tickler
// breaks for some reason.
timer := time.NewTimer(5 * time.Minute)
select {
case <-tickler:
timer.Stop()
case <-timer.C:
}
}
}
func (r *Repo) name() string {
if r.path == "" {
return "go"
}
return path.Base(r.path)
}
func (r *Repo) logf(format string, args ...interface{}) {
log.Printf(r.name()+": "+format, args...)
}
// postNewCommits looks for unseen commits on the specified branch and
// posts them to the dashboard.
func (r *Repo) postNewCommits(b *Branch) error {
if b.Head == b.LastSeen {
return nil
}
c := b.LastSeen
if c == nil {
// Haven't seen anything on this branch yet:
if b.Name == master {
// For the master branch, bootstrap by creating a dummy
// commit with a lone child that is the initial commit.
c = &Commit{}
for _, c2 := range r.commits {
if c2.Parent == "" {
c.children = []*Commit{c2}
break
}
}
if c.children == nil {
return fmt.Errorf("couldn't find initial commit")
}
} else {
// Find the commit that this branch forked from.
base, err := r.mergeBase(b.Name, master)
if err != nil {
return err
}
var ok bool
c, ok = r.commits[base]
if !ok {
return fmt.Errorf("couldn't find base commit: %v", base)
}
}
}
if err := r.postChildren(b, c); err != nil {
return err
}
b.LastSeen = b.Head
return nil
}
// postChildren posts to the dashboard all descendants of the given parent.
// It ignores descendants that are not on the given branch.
func (r *Repo) postChildren(b *Branch, parent *Commit) error {
for _, c := range parent.children {
if c.Branch != b.Name {
continue
}
if err := r.postCommit(c); err != nil {
return err
}
}
for _, c := range parent.children {
if err := r.postChildren(b, c); err != nil {
return err
}
}
return nil
}
// postCommit sends a commit to the build dashboard.
func (r *Repo) postCommit(c *Commit) error {
r.logf("sending commit to dashboard: %v", c)
t, err := time.Parse("Mon, 2 Jan 2006 15:04:05 -0700", c.Date)
if err != nil {
return fmt.Errorf("postCommit: parsing date %q for commit %v: %v", c.Date, c, err)
}
dc := struct {
PackagePath string // (empty for main repo commits)
Hash string
ParentHash string
User string
Desc string
Time time.Time
Branch string
NeedsBenchmarking bool
}{
PackagePath: r.path,
Hash: c.Hash,
ParentHash: c.Parent,
User: c.Author,
Desc: c.Desc,
Time: t,
Branch: strings.TrimPrefix(c.Branch, origin),
NeedsBenchmarking: c.NeedsBenchmarking(),
}
b, err := json.Marshal(dc)
if err != nil {
return fmt.Errorf("postCommit: marshaling request body: %v", err)
}
if !*network {
if c.Parent != "" {
if !networkSeen[c.Parent] {
r.logf("%v: %v", c.Parent, r.commits[c.Parent])
return fmt.Errorf("postCommit: no parent %v found on dashboard for %v", c.Parent, c)
}
}
if networkSeen[c.Hash] {
return fmt.Errorf("postCommit: already seen %v", c)
}
networkSeen[c.Hash] = true
return nil
}
u := fmt.Sprintf("%vcommit?version=%v&key=%v", *dashboard, watcherVersion, dashboardKey)
resp, err := http.Post(u, "text/json", bytes.NewReader(b))
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return fmt.Errorf("postCommit: status: %v", resp.Status)
}
var s struct {
Error string
}
err = json.NewDecoder(resp.Body).Decode(&s)
if err != nil {
return fmt.Errorf("postCommit: decoding response: %v", err)
}
if s.Error != "" {
return fmt.Errorf("postCommit: error: %v", s.Error)
}
return nil
}
// update looks for new commits and branches,
// and updates the commits and branches maps.
func (r *Repo) update(noisy bool) error {
remotes, err := r.remotes()
if err != nil {
return err
}
for _, name := range remotes {
b := r.branches[name]
// Find all unseen commits on this branch.
revspec := name
if b != nil {
// If we know about this branch,
// only log commits down to the known head.
revspec = b.Head.Hash + ".." + name
} else if revspec != master {
// If this is an unknown non-master branch,
// log up to where it forked from master.
base, err := r.mergeBase(name, master)
if err != nil {
return err
}
revspec = base + ".." + name
}
log, err := r.log("--topo-order", revspec)
if err != nil {
return err
}
if len(log) == 0 {
// No commits to handle; carry on.
continue
}
// Add unknown commits to r.commits.
var added []*Commit
for _, c := range log {
// Sanity check: we shouldn't see the same commit twice.
if _, ok := r.commits[c.Hash]; ok {
return fmt.Errorf("found commit we already knew about: %v", c.Hash)
}
if noisy {
r.logf("found new commit %v", c)
}
c.Branch = name
r.commits[c.Hash] = c
added = append(added, c)
}
// Link added commits.
for _, c := range added {
if c.Parent == "" {
// This is the initial commit; no parent.
r.logf("no parents for initial commit %v", c)
continue
}
// Find parent commit.
p, ok := r.commits[c.Parent]
if !ok {
return fmt.Errorf("can't find parent %q for %v", c.Parent, c)
}
// Link parent Commit.
c.parent = p
// Link child Commits.
p.children = append(p.children, c)
}
// Update branch head, or add newly discovered branch.
head := log[0]
if b != nil {
// Known branch; update head.
b.Head = head
r.logf("updated branch head: %v", b)
} else {
// It's a new branch; add it.
seen, err := r.lastSeen(head.Hash)
if err != nil {
return err
}
b = &Branch{Name: name, Head: head, LastSeen: seen}
r.branches[name] = b
r.logf("found branch: %v", b)
}
}
return nil
}
// lastSeen finds the most recent commit the dashboard has seen,
// starting at the specified head. If the dashboard hasn't seen
// any of the commits from head to the beginning, it returns nil.
func (r *Repo) lastSeen(head string) (*Commit, error) {
h, ok := r.commits[head]
if !ok {
return nil, fmt.Errorf("lastSeen: can't find %q in commits", head)
}
var s []*Commit
for c := h; c != nil; c = c.parent {
s = append(s, c)
}
var err error
i := sort.Search(len(s), func(i int) bool {
if err != nil {
return false
}
ok, err = r.dashSeen(s[i].Hash)
return ok
})
switch {
case err != nil:
return nil, fmt.Errorf("lastSeen: %v", err)
case i < len(s):
return s[i], nil
default:
// Dashboard saw no commits.
return nil, nil
}
}
// dashSeen reports whether the build dashboard knows the specified commit.
func (r *Repo) dashSeen(hash string) (bool, error) {
if !*network {
return networkSeen[hash], nil
}
v := url.Values{"hash": {hash}, "packagePath": {r.path}}
u := *dashboard + "commit?" + v.Encode()
resp, err := http.Get(u)
if err != nil {
return false, err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return false, fmt.Errorf("status: %v", resp.Status)
}
var s struct {
Error string
}
err = json.NewDecoder(resp.Body).Decode(&s)
if err != nil {
return false, err
}
switch s.Error {
case "":
// Found one.
return true, nil
case "Commit not found":
// Commit not found, keep looking for earlier commits.
return false, nil
default:
return false, fmt.Errorf("dashboard: %v", s.Error)
}
}
// mergeBase returns the hash of the merge base for revspecs a and b.
func (r *Repo) mergeBase(a, b string) (string, error) {
cmd := exec.Command("git", "merge-base", a, b)
cmd.Dir = r.root
out, err := cmd.CombinedOutput()
if err != nil {
return "", fmt.Errorf("git merge-base: %v", err)
}
return string(bytes.TrimSpace(out)), nil
}
// remotes returns a slice of remote branches known to the git repo.
// It always puts "origin/master" first.
func (r *Repo) remotes() ([]string, error) {
cmd := exec.Command("git", "branch", "-r")
cmd.Dir = r.root
out, err := cmd.CombinedOutput()
if err != nil {
return nil, fmt.Errorf("git branch: %v", err)
}
bs := []string{master}
for _, b := range strings.Split(string(out), "\n") {
b = strings.TrimSpace(b)
// Ignore aliases, blank lines, and master (it's already in bs).
if b == "" || strings.Contains(b, "->") || b == master {
continue
}
// Ignore pre-go1 release branches; they are just noise.
if strings.HasPrefix(b, origin+"release-branch.r") {
continue
}
bs = append(bs, b)
}
return bs, nil
}
const logFormat = `--format=format:%H
%P
%an <%ae>
%cD
%B
` + logBoundary
const logBoundary = `_-_- magic boundary -_-_`
// log runs "git log" with the supplied arguments
// and parses the output into Commit values.
func (r *Repo) log(dir string, args ...string) ([]*Commit, error) {
args = append([]string{"log", "--date=rfc", logFormat}, args...)
cmd := exec.Command("git", args...)
cmd.Dir = r.root
out, err := cmd.CombinedOutput()
if err != nil {
return nil, fmt.Errorf("git log %v: %v", strings.Join(args, " "), err)
}
// We have a commit with description that contains 0x1b byte.
// Mercurial does not escape it, but xml.Unmarshal does not accept it.
// TODO(adg): do we still need to scrub this? Probably.
out = bytes.Replace(out, []byte{0x1b}, []byte{'?'}, -1)
var cs []*Commit
for _, text := range strings.Split(string(out), logBoundary) {
text = strings.TrimSpace(text)
if text == "" {
continue
}
p := strings.SplitN(text, "\n", 5)
if len(p) != 5 {
return nil, fmt.Errorf("git log %v: malformed commit: %q", strings.Join(args, " "), text)
}
cs = append(cs, &Commit{
Hash: p[0],
// TODO(adg): This may break with branch merges.
Parent: strings.Split(p[1], " ")[0],
Author: p[2],
Date: p[3],
Desc: strings.TrimSpace(p[4]),
// TODO(adg): populate Files
})
}
return cs, nil
}
// fetch runs "git fetch" in the repository root.
// It tries three times, just in case it failed because of a transient error.
func (r *Repo) fetch() error {
var err error
for tries := 0; tries < 3; tries++ {
time.Sleep(time.Duration(tries) * 5 * time.Second) // Linear back-off.
cmd := exec.Command("git", "fetch", "--all")
cmd.Dir = r.root
if out, e := cmd.CombinedOutput(); err != nil {
e = fmt.Errorf("%v\n\n%s", e, out)
log.Printf("git fetch error %v: %v", r.root, e)
if err == nil {
err = e
}
continue
}
return nil
}
return err
}
// Branch represents a Mercurial branch.
type Branch struct {
Name string
Head *Commit
LastSeen *Commit // the last commit posted to the dashboard
}
func (b *Branch) String() string {
return fmt.Sprintf("%q(Head: %v LastSeen: %v)", b.Name, b.Head, b.LastSeen)
}
// Commit represents a single Git commit.
type Commit struct {
Hash string
Author string
Date string // Format: "Mon, 2 Jan 2006 15:04:05 -0700"
Desc string // Plain text, first line is a short description.
Parent string
Branch string
Files string
// For walking the graph.
parent *Commit
children []*Commit
}
func (c *Commit) String() string {
s := c.Hash
if c.Branch != "" {
s += fmt.Sprintf("[%v]", strings.TrimPrefix(c.Branch, origin))
}
s += fmt.Sprintf("(%q)", strings.SplitN(c.Desc, "\n", 2)[0])
return s
}
// NeedsBenchmarking reports whether the Commit needs benchmarking.
func (c *Commit) NeedsBenchmarking() bool {
// Do not benchmark branch commits, they are usually not interesting
// and fall out of the trunk succession.
if c.Branch != master {
return false
}
// Do not benchmark commits that do not touch source files (e.g. CONTRIBUTORS).
for _, f := range strings.Split(c.Files, " ") {
if (strings.HasPrefix(f, "include") || strings.HasPrefix(f, "src")) &&
!strings.HasSuffix(f, "_test.go") && !strings.Contains(f, "testdata") {
return true
}
}
return false
}
func homeDir() string {
switch runtime.GOOS {
case "plan9":
return os.Getenv("home")
case "windows":
return os.Getenv("HOMEDRIVE") + os.Getenv("HOMEPATH")
}
return os.Getenv("HOME")
}
func readKey() (string, error) {
c, err := ioutil.ReadFile(*keyFile)
if err != nil {
return "", err
}
return string(bytes.TrimSpace(bytes.SplitN(c, []byte("\n"), 2)[0])), nil
}
// subrepoList fetches a list of sub-repositories from the dashboard
// and returns them as a slice of base import paths.
// Eg, []string{"golang.org/x/tools", "golang.org/x/net"}.
func subrepoList() ([]string, error) {
if !*network {
return nil, nil
}
r, err := http.Get(*dashboard + "packages?kind=subrepo")
if err != nil {
return nil, fmt.Errorf("subrepo list: %v", err)
}
defer r.Body.Close()
if r.StatusCode != 200 {
return nil, fmt.Errorf("subrepo list: got status %v", r.Status)
}
var resp struct {
Response []struct {
Path string
}
Error string
}
err = json.NewDecoder(r.Body).Decode(&resp)
if err != nil {
return nil, fmt.Errorf("subrepo list: %v", err)
}
if resp.Error != "" {
return nil, fmt.Errorf("subrepo list: %v", resp.Error)
}
var pkgs []string
for _, r := range resp.Response {
pkgs = append(pkgs, r.Path)
}
return pkgs, nil
}
var (
ticklerMu sync.Mutex
ticklers = make(map[string]chan bool)
)
// repo is the gerrit repo: e.g. "go", "net", "crypto", ...
func repoTickler(repo string) chan bool {
ticklerMu.Lock()
defer ticklerMu.Unlock()
if c, ok := ticklers[repo]; ok {
return c
}
c := make(chan bool, 1)
ticklers[repo] = c
return c
}
// pollGerritAndTickle polls Gerrit's JSON meta URL of all its URLs
// and their current branch heads. When this sees that one has
// changed, it tickles the channel for that repo and wakes up its
// poller, if its poller is in a sleep.
func pollGerritAndTickle() {
last := map[string]string{} // repo -> last seen hash
for {
for repo, hash := range gerritMetaMap() {
if hash != last[repo] {
last[repo] = hash
select {
case repoTickler(repo) <- true:
log.Printf("tickled the %s repo poller", repo)
default:
}
}
}
time.Sleep(*pollInterval)
}
}
// gerritMetaMap returns the map from repo name (e.g. "go") to its
// latest master hash.
// The returned map is nil on any transient error.
func gerritMetaMap() map[string]string {
res, err := http.Get(metaURL)
if err != nil {
return nil
}
defer res.Body.Close()
defer io.Copy(ioutil.Discard, res.Body) // ensure EOF for keep-alive
if res.StatusCode != 200 {
return nil
}
var meta map[string]struct {
Branches map[string]string
}
br := bufio.NewReader(res.Body)
// For security reasons or something, this URL starts with ")]}'\n" before
// the JSON object. So ignore that.
// Shawn Pearce says it's guaranteed to always be just one line, ending in '\n'.
for {
b, err := br.ReadByte()
if err != nil {
return nil
}
if b == '\n' {
break
}
}
if err := json.NewDecoder(br).Decode(&meta); err != nil {
log.Printf("JSON decoding error from %v: %s", metaURL, err)
return nil
}
m := map[string]string{}
for repo, v := range meta {
if master, ok := v.Branches["master"]; ok {
m[repo] = master
}
}
return m
}