2014-09-03 11:26:52 -06:00
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
2014-12-30 17:19:06 -07:00
// +build build_coordinator
2014-09-03 11:26:52 -06:00
// The coordinator runs on GCE and coordinates builds in Docker containers.
2014-12-08 21:00:58 -07:00
package main // import "golang.org/x/tools/dashboard/coordinator"
2014-09-03 11:26:52 -06:00
import (
2015-01-02 18:16:44 -07:00
"archive/tar"
2014-09-03 11:26:52 -06:00
"bytes"
2015-01-02 18:16:44 -07:00
"compress/gzip"
2014-09-03 11:26:52 -06:00
"crypto/hmac"
"crypto/md5"
2015-01-07 16:44:25 -07:00
"crypto/rand"
2014-09-03 11:26:52 -06:00
"encoding/json"
2015-01-02 18:16:44 -07:00
"errors"
2014-09-03 11:26:52 -06:00
"flag"
"fmt"
2014-12-14 17:51:42 -07:00
"html"
2014-09-03 11:26:52 -06:00
"io"
"io/ioutil"
"log"
"net/http"
2015-01-02 18:16:44 -07:00
"net/url"
2014-09-03 11:26:52 -06:00
"os"
"os/exec"
2015-01-02 18:16:44 -07:00
"path"
"regexp"
2014-09-03 11:26:52 -06:00
"sort"
2014-12-30 17:19:06 -07:00
"strconv"
2014-09-03 11:26:52 -06:00
"strings"
"sync"
"time"
2014-12-30 17:19:06 -07:00
"golang.org/x/oauth2"
"golang.org/x/oauth2/google"
2015-01-04 22:46:23 -07:00
"golang.org/x/tools/dashboard/types"
2014-12-30 17:19:06 -07:00
"google.golang.org/api/compute/v1"
"google.golang.org/cloud/compute/metadata"
2014-09-03 11:26:52 -06:00
)
var (
2015-01-02 18:16:44 -07:00
masterKeyFile = flag . String ( "masterkey" , "" , "Path to builder master key. Else fetched using GCE project attribute 'builder-master-key'." )
maxLocalBuilds = flag . Int ( "maxbuilds" , 6 , "Max concurrent Docker builds (VM builds don't count)" )
2014-09-03 11:26:52 -06:00
2014-12-30 17:19:06 -07:00
cleanZones = flag . String ( "zones" , "us-central1-a,us-central1-b,us-central1-f" , "Comma-separated list of zones to periodically clean of stale build VMs (ones that failed to shut themselves down)" )
2014-09-03 11:26:52 -06:00
// Debug flags:
addTemp = flag . Bool ( "temp" , false , "Append -temp to all builders." )
just = flag . String ( "just" , "" , "If non-empty, run single build in the foreground. Requires rev." )
rev = flag . String ( "rev" , "" , "Revision to build." )
)
var (
startTime = time . Now ( )
2015-01-02 18:16:44 -07:00
builders = map [ string ] buildConfig { } // populated at startup, keys like "openbsd-amd64-56"
watchers = map [ string ] watchConfig { } // populated at startup, keyed by repo, e.g. "https://go.googlesource.com/go"
2014-09-03 11:26:52 -06:00
donec = make ( chan builderRev ) // reports of finished builders
2015-01-02 16:00:54 -07:00
statusMu sync . Mutex // guards both status (ongoing ones) and statusDone (just finished)
status = map [ builderRev ] * buildStatus { }
statusDone [ ] * buildStatus // finished recently, capped to maxStatusDone
2014-09-03 11:26:52 -06:00
)
2015-01-07 16:44:25 -07:00
const (
maxStatusDone = 30
// vmDeleteTimeout is how long before we delete a VM.
// In practice this need only be as long as the slowest
// builder (plan9 currently), because on startup this program
// already deletes all buildlets it doesn't know about
// (i.e. ones from a previous instance of the coordinator).
vmDeleteTimeout = 45 * time . Minute
)
2015-01-02 16:00:54 -07:00
2015-01-02 18:16:44 -07:00
// Initialized by initGCE:
var (
projectID string
projectZone string
computeService * compute . Service
)
func initGCE ( ) error {
if ! metadata . OnGCE ( ) {
return errors . New ( "not running on GCE; VM support disabled" )
}
var err error
projectID , err = metadata . ProjectID ( )
if err != nil {
return fmt . Errorf ( "failed to get current GCE ProjectID: %v" , err )
}
projectZone , err = metadata . Get ( "instance/zone" )
if err != nil || projectZone == "" {
return fmt . Errorf ( "failed to get current GCE zone: %v" , err )
}
// Convert the zone from "projects/1234/zones/us-central1-a" to "us-central1-a".
projectZone = path . Base ( projectZone )
if ! hasComputeScope ( ) {
return errors . New ( "The coordinator is not running with access to read and write Compute resources. VM support disabled." )
}
ts := google . ComputeTokenSource ( "default" )
computeService , _ = compute . New ( oauth2 . NewClient ( oauth2 . NoContext , ts ) )
return nil
}
2014-09-03 11:26:52 -06:00
type imageInfo struct {
url string // of tar file
mu sync . Mutex
lastMod string
}
var images = map [ string ] * imageInfo {
2014-12-01 10:59:02 -07:00
"go-commit-watcher" : { url : "https://storage.googleapis.com/go-builder-data/docker-commit-watcher.tar.gz" } ,
2014-09-09 11:54:11 -06:00
"gobuilders/linux-x86-base" : { url : "https://storage.googleapis.com/go-builder-data/docker-linux.base.tar.gz" } ,
2014-10-21 16:33:04 -06:00
"gobuilders/linux-x86-clang" : { url : "https://storage.googleapis.com/go-builder-data/docker-linux.clang.tar.gz" } ,
2014-09-09 11:54:11 -06:00
"gobuilders/linux-x86-gccgo" : { url : "https://storage.googleapis.com/go-builder-data/docker-linux.gccgo.tar.gz" } ,
"gobuilders/linux-x86-nacl" : { url : "https://storage.googleapis.com/go-builder-data/docker-linux.nacl.tar.gz" } ,
2014-09-26 16:02:51 -06:00
"gobuilders/linux-x86-sid" : { url : "https://storage.googleapis.com/go-builder-data/docker-linux.sid.tar.gz" } ,
2014-09-03 11:26:52 -06:00
}
2015-01-02 18:16:44 -07:00
// A buildConfig describes how to run either a Docker-based or VM-based build.
2014-09-03 11:26:52 -06:00
type buildConfig struct {
2015-01-02 18:16:44 -07:00
name string // "linux-amd64-race"
// VM-specific settings: (used if vmImage != "")
vmImage string // e.g. "openbsd-amd64-56"
machineType string // optional GCE instance type
// Docker-specific settings: (used if vmImage == "")
2014-10-09 15:04:49 -06:00
image string // Docker image to use to build
cmd string // optional -cmd flag (relative to go/src/)
env [ ] string // extra environment ("key=value") pairs
dashURL string // url of the build dashboard
tool string // the tool this configuration is for
2014-09-03 11:26:52 -06:00
}
2015-01-02 18:16:44 -07:00
func ( c * buildConfig ) usesDocker ( ) bool { return c . vmImage == "" }
func ( c * buildConfig ) usesVM ( ) bool { return c . vmImage != "" }
func ( c * buildConfig ) MachineType ( ) string {
if v := c . machineType ; v != "" {
return v
}
return "n1-highcpu-4"
}
// recordResult sends build results to the dashboard
func ( b * buildConfig ) recordResult ( ok bool , hash , buildLog string , runTime time . Duration ) error {
req := map [ string ] interface { } {
"Builder" : b . name ,
"PackagePath" : "" ,
"Hash" : hash ,
"GoHash" : "" ,
"OK" : ok ,
"Log" : buildLog ,
"RunTime" : runTime ,
}
args := url . Values { "key" : { builderKey ( b . name ) } , "builder" : { b . name } }
return dash ( "POST" , "result" , args , req , nil )
}
2014-12-01 10:59:02 -07:00
type watchConfig struct {
2014-12-11 19:19:15 -07:00
repo string // "https://go.googlesource.com/go"
2014-12-01 10:59:02 -07:00
dash string // "https://build.golang.org/" (must end in /)
interval time . Duration // Polling interval
}
2014-09-03 11:26:52 -06:00
func main ( ) {
flag . Parse ( )
2015-01-02 18:16:44 -07:00
if err := initGCE ( ) ; err != nil {
log . Printf ( "VM support disabled due to error initializing GCE: %v" , err )
}
2014-09-03 11:26:52 -06:00
addBuilder ( buildConfig { name : "linux-386" } )
addBuilder ( buildConfig { name : "linux-386-387" , env : [ ] string { "GO386=387" } } )
addBuilder ( buildConfig { name : "linux-amd64" } )
2014-09-04 18:48:56 -06:00
addBuilder ( buildConfig { name : "linux-amd64-nocgo" , env : [ ] string { "CGO_ENABLED=0" , "USER=root" } } )
2014-09-24 13:54:59 -06:00
addBuilder ( buildConfig { name : "linux-amd64-noopt" , env : [ ] string { "GO_GCFLAGS=-N -l" } } )
2014-09-03 11:26:52 -06:00
addBuilder ( buildConfig { name : "linux-amd64-race" } )
addBuilder ( buildConfig { name : "nacl-386" } )
addBuilder ( buildConfig { name : "nacl-amd64p32" } )
2014-09-09 11:54:11 -06:00
addBuilder ( buildConfig {
2014-10-09 15:04:49 -06:00
name : "linux-amd64-gccgo" ,
image : "gobuilders/linux-x86-gccgo" ,
cmd : "make RUNTESTFLAGS=\"--target_board=unix/-m64\" check-go -j16" ,
dashURL : "https://build.golang.org/gccgo" ,
tool : "gccgo" ,
} )
addBuilder ( buildConfig {
name : "linux-386-gccgo" ,
image : "gobuilders/linux-x86-gccgo" ,
cmd : "make RUNTESTFLAGS=\"--target_board=unix/-m32\" check-go -j16" ,
dashURL : "https://build.golang.org/gccgo" ,
tool : "gccgo" ,
2014-09-09 11:54:11 -06:00
} )
2014-09-26 16:02:51 -06:00
addBuilder ( buildConfig { name : "linux-386-sid" , image : "gobuilders/linux-x86-sid" } )
addBuilder ( buildConfig { name : "linux-amd64-sid" , image : "gobuilders/linux-x86-sid" } )
2014-10-01 12:57:02 -06:00
addBuilder ( buildConfig { name : "linux-386-clang" , image : "gobuilders/linux-x86-clang" } )
addBuilder ( buildConfig { name : "linux-amd64-clang" , image : "gobuilders/linux-x86-clang" } )
2014-09-03 11:26:52 -06:00
2015-01-02 18:16:44 -07:00
// VMs:
2015-01-07 16:44:25 -07:00
addBuilder ( buildConfig {
name : "openbsd-amd64-gce56" ,
vmImage : "openbsd-amd64-56" ,
machineType : "n1-highcpu-2" ,
} )
addBuilder ( buildConfig {
name : "plan9-386-gce" ,
vmImage : "plan9-386" ,
// We *were* using n1-standard-1 because Plan 9 can only
// reliably use a single CPU. Using 2 or 4 and we see
// test failures. See:
// https://golang.org/issue/8393
// https://golang.org/issue/9491
// n1-standard-1 has 3.6 GB of memory which is
// overkill (userspace probably only sees 2GB anyway),
// but it's the cheapest option. And plenty to keep
// our ~250 MB of inputs+outputs in its ramfs.
//
// But the docs says "For the n1 series of machine
// types, a virtual CPU is implemented as a single
// hyperthread on a 2.6GHz Intel Sandy Bridge Xeon or
// Intel Ivy Bridge Xeon (or newer) processor. This
// means that the n1-standard-2 machine type will see
// a whole physical core."
//
// ... so we use n1-highcpu-2 (1.80 RAM, still
// plenty), just so we can get 1 whole core for the
// single-core Plan 9. It will see 2 virtual cores and
// only use 1, but we hope that 1 will be more powerful
// and we'll stop timing out on tests.
machineType : "n1-highcpu-2" ,
} )
2015-01-02 18:16:44 -07:00
2014-12-11 19:19:15 -07:00
addWatcher ( watchConfig { repo : "https://go.googlesource.com/go" , dash : "https://build.golang.org/" } )
// TODO(adg,cmang): fix gccgo watcher
// addWatcher(watchConfig{repo: "https://code.google.com/p/gofrontend", dash: "https://build.golang.org/gccgo/"})
2014-12-01 10:59:02 -07:00
2014-09-03 11:26:52 -06:00
if ( * just != "" ) != ( * rev != "" ) {
log . Fatalf ( "--just and --rev must be used together" )
}
if * just != "" {
conf , ok := builders [ * just ]
if ! ok {
log . Fatalf ( "unknown builder %q" , * just )
}
cmd := exec . Command ( "docker" , append ( [ ] string { "run" } , conf . dockerRunArgs ( * rev ) ... ) ... )
cmd . Stdout = os . Stdout
cmd . Stderr = os . Stderr
if err := cmd . Run ( ) ; err != nil {
log . Fatalf ( "Build failed: %v" , err )
}
return
}
http . HandleFunc ( "/" , handleStatus )
http . HandleFunc ( "/logs" , handleLogs )
go http . ListenAndServe ( ":80" , nil )
2014-12-14 17:51:42 -07:00
go cleanUpOldContainers ( )
2014-12-30 17:19:06 -07:00
go cleanUpOldVMs ( )
2014-12-14 17:51:42 -07:00
2015-01-02 18:16:44 -07:00
stopWatchers ( ) // clean up before we start new ones
2014-12-01 10:59:02 -07:00
for _ , watcher := range watchers {
if err := startWatching ( watchers [ watcher . repo ] ) ; err != nil {
log . Printf ( "Error starting watcher for %s: %v" , watcher . repo , err )
}
}
2014-09-03 11:26:52 -06:00
workc := make ( chan builderRev )
2015-01-04 22:46:23 -07:00
go findWorkLoop ( workc )
// TODO(cmang): gccgo will need its own findWorkLoop
2014-09-03 11:26:52 -06:00
ticker := time . NewTicker ( 1 * time . Minute )
for {
select {
case work := <- workc :
2015-01-02 18:16:44 -07:00
log . Printf ( "workc received %+v; len(status) = %v, maxLocalBuilds = %v; cur = %p" , work , len ( status ) , * maxLocalBuilds , status [ work ] )
if mayBuildRev ( work ) {
conf := builders [ work . name ]
if st , err := startBuilding ( conf , work . rev ) ; err == nil {
2014-09-03 11:26:52 -06:00
setStatus ( work , st )
} else {
log . Printf ( "Error starting to build %v: %v" , work , err )
}
}
case done := <- donec :
log . Printf ( "%v done" , done )
2015-01-02 16:00:54 -07:00
markDone ( done )
2014-09-03 11:26:52 -06:00
case <- ticker . C :
if numCurrentBuilds ( ) == 0 && time . Now ( ) . After ( startTime . Add ( 10 * time . Minute ) ) {
// TODO: halt the whole machine to kill the VM or something
}
}
}
}
func numCurrentBuilds ( ) int {
statusMu . Lock ( )
defer statusMu . Unlock ( )
return len ( status )
}
2015-01-04 22:46:23 -07:00
func isBuilding ( work builderRev ) bool {
statusMu . Lock ( )
defer statusMu . Unlock ( )
_ , building := status [ work ]
return building
}
2015-01-02 18:16:44 -07:00
// mayBuildRev reports whether the build type & revision should be started.
// It returns true if it's not already building, and there is capacity.
2014-09-03 11:26:52 -06:00
func mayBuildRev ( work builderRev ) bool {
2015-01-02 18:16:44 -07:00
conf := builders [ work . name ]
2014-09-03 11:26:52 -06:00
statusMu . Lock ( )
2015-01-02 18:16:44 -07:00
_ , building := status [ work ]
statusMu . Unlock ( )
if building {
return false
}
if conf . usesVM ( ) {
// These don't count towards *maxLocalBuilds.
return true
}
numDocker , err := numDockerBuilds ( )
if err != nil {
log . Printf ( "not starting %v due to docker ps failure: %v" , work , err )
return false
}
return numDocker < * maxLocalBuilds
2014-09-03 11:26:52 -06:00
}
func setStatus ( work builderRev , st * buildStatus ) {
statusMu . Lock ( )
defer statusMu . Unlock ( )
2015-01-02 16:00:54 -07:00
status [ work ] = st
}
func markDone ( work builderRev ) {
statusMu . Lock ( )
defer statusMu . Unlock ( )
st , ok := status [ work ]
if ! ok {
return
2014-09-03 11:26:52 -06:00
}
2015-01-02 16:00:54 -07:00
delete ( status , work )
if len ( statusDone ) == maxStatusDone {
copy ( statusDone , statusDone [ 1 : ] )
statusDone = statusDone [ : len ( statusDone ) - 1 ]
}
statusDone = append ( statusDone , st )
2014-09-03 11:26:52 -06:00
}
2015-01-07 16:44:25 -07:00
func vmIsBuilding ( instName string ) bool {
if instName == "" {
log . Printf ( "bogus empty instance name passed to vmIsBuilding" )
return false
}
statusMu . Lock ( )
defer statusMu . Unlock ( )
for _ , st := range status {
if st . instName == instName {
return true
}
}
return false
}
2015-01-02 18:16:44 -07:00
// statusPtrStr disambiguates which status to return if there are
// multiple in the history (e.g. recent failures where the build
// didn't finish for reasons outside of all.bash failing)
func getStatus ( work builderRev , statusPtrStr string ) * buildStatus {
2014-09-03 11:26:52 -06:00
statusMu . Lock ( )
defer statusMu . Unlock ( )
2015-01-02 18:16:44 -07:00
match := func ( st * buildStatus ) bool {
return statusPtrStr == "" || fmt . Sprintf ( "%p" , st ) == statusPtrStr
}
if st , ok := status [ work ] ; ok && match ( st ) {
2015-01-02 16:00:54 -07:00
return st
}
for _ , st := range statusDone {
2015-01-02 18:16:44 -07:00
if st . builderRev == work && match ( st ) {
2015-01-02 16:00:54 -07:00
return st
}
}
return nil
2014-09-03 11:26:52 -06:00
}
type byAge [ ] * buildStatus
func ( s byAge ) Len ( ) int { return len ( s ) }
func ( s byAge ) Less ( i , j int ) bool { return s [ i ] . start . Before ( s [ j ] . start ) }
func ( s byAge ) Swap ( i , j int ) { s [ i ] , s [ j ] = s [ j ] , s [ i ] }
func handleStatus ( w http . ResponseWriter , r * http . Request ) {
var active [ ] * buildStatus
2015-01-02 16:00:54 -07:00
var recent [ ] * buildStatus
2014-09-03 11:26:52 -06:00
statusMu . Lock ( )
for _ , st := range status {
active = append ( active , st )
}
2015-01-02 16:00:54 -07:00
recent = append ( recent , statusDone ... )
2015-01-02 18:16:44 -07:00
numTotal := len ( status )
numDocker , err := numDockerBuilds ( )
2014-09-03 11:26:52 -06:00
statusMu . Unlock ( )
sort . Sort ( byAge ( active ) )
2015-01-02 18:16:44 -07:00
sort . Sort ( sort . Reverse ( byAge ( recent ) ) )
2015-01-02 16:00:54 -07:00
io . WriteString ( w , "<html><body><h1>Go build coordinator</h1>" )
2015-01-02 18:16:44 -07:00
if err != nil {
fmt . Fprintf ( w , "<h2>Error</h2>Error fetching Docker build count: <i>%s</i>\n" , html . EscapeString ( err . Error ( ) ) )
}
fmt . Fprintf ( w , "<h2>running</h2><p>%d total builds active (Docker: %d/%d; VMs: %d/∞):" ,
numTotal , numDocker , * maxLocalBuilds , numTotal - numDocker )
io . WriteString ( w , "<pre>" )
2014-09-03 11:26:52 -06:00
for _ , st := range active {
2015-01-02 16:00:54 -07:00
io . WriteString ( w , st . htmlStatusLine ( ) )
2014-09-03 11:26:52 -06:00
}
2015-01-02 16:00:54 -07:00
io . WriteString ( w , "</pre>" )
io . WriteString ( w , "<h2>recently completed</h2><pre>" )
for _ , st := range recent {
io . WriteString ( w , st . htmlStatusLine ( ) )
}
io . WriteString ( w , "</pre>" )
fmt . Fprintf ( w , "<h2>disk space</h2><pre>%s</pre></body></html>" , html . EscapeString ( diskFree ( ) ) )
2014-12-14 17:51:42 -07:00
}
func diskFree ( ) string {
out , _ := exec . Command ( "df" , "-h" ) . Output ( )
return string ( out )
2014-09-03 11:26:52 -06:00
}
func handleLogs ( w http . ResponseWriter , r * http . Request ) {
2015-01-02 18:16:44 -07:00
st := getStatus ( builderRev { r . FormValue ( "name" ) , r . FormValue ( "rev" ) } , r . FormValue ( "st" ) )
2014-09-03 11:26:52 -06:00
if st == nil {
2015-01-02 16:00:54 -07:00
http . NotFound ( w , r )
2014-09-03 11:26:52 -06:00
return
}
w . Header ( ) . Set ( "Content-Type" , "text/plain; charset=utf-8" )
2015-01-02 16:00:54 -07:00
io . WriteString ( w , st . logs ( ) )
// TODO: if st is still building, stream them to the user with
// http.Flusher.Flush and CloseNotifier and registering interest
// of new writes with the buildStatus. Will require moving the
// BUILDERKEY scrubbing into the Write method.
2014-09-03 11:26:52 -06:00
}
2015-01-04 22:46:23 -07:00
// findWorkLoop polls http://build.golang.org/?mode=json looking for new work
// for the main dashboard. It does not support gccgo.
// TODO(bradfitz): it also currently does not support subrepos.
func findWorkLoop ( work chan <- builderRev ) {
ticker := time . NewTicker ( 15 * time . Second )
2014-09-03 11:26:52 -06:00
for {
2015-01-04 22:46:23 -07:00
if err := findWork ( work ) ; err != nil {
log . Printf ( "failed to find new work: %v" , err )
2014-09-03 11:26:52 -06:00
}
2015-01-04 22:46:23 -07:00
<- ticker . C
2014-09-03 11:26:52 -06:00
}
}
2015-01-04 22:46:23 -07:00
func findWork ( work chan <- builderRev ) error {
var bs types . BuildStatus
res , err := http . Get ( "https://build.golang.org/?mode=json" )
2014-09-03 11:26:52 -06:00
if err != nil {
2015-01-04 22:46:23 -07:00
return err
2014-09-03 11:26:52 -06:00
}
defer res . Body . Close ( )
2015-01-04 22:46:23 -07:00
if err := json . NewDecoder ( res . Body ) . Decode ( & bs ) ; err != nil {
return err
}
2014-09-03 11:26:52 -06:00
if res . StatusCode != 200 {
2015-01-04 22:46:23 -07:00
return fmt . Errorf ( "unexpected http status %v" , res . Status )
}
knownToDashboard := map [ string ] bool { } // keys are builder
for _ , b := range bs . Builders {
knownToDashboard [ b ] = true
}
var goRevisions [ ] string
for _ , br := range bs . Revisions {
if br . Repo == "go" {
goRevisions = append ( goRevisions , br . Revision )
} else {
// TODO(bradfitz): support these: golang.org/issue/9506
continue
}
if len ( br . Results ) != len ( bs . Builders ) {
return errors . New ( "bogus JSON response from dashboard: results is too long." )
}
for i , res := range br . Results {
if res != "" {
// It's either "ok" or a failure URL.
continue
}
builder := bs . Builders [ i ]
if _ , ok := builders [ builder ] ; ! ok {
// Not managed by the coordinator.
continue
}
br := builderRev { bs . Builders [ i ] , br . Revision }
if ! isBuilding ( br ) {
work <- br
}
}
2014-09-03 11:26:52 -06:00
}
2015-01-04 22:46:23 -07:00
// And to bootstrap new builders, see if we have any builders
// that the dashboard doesn't know about.
for b := range builders {
if knownToDashboard [ b ] {
continue
}
for _ , rev := range goRevisions {
br := builderRev { b , rev }
if ! isBuilding ( br ) {
work <- br
}
}
2014-09-03 11:26:52 -06:00
}
2015-01-04 22:46:23 -07:00
return nil
2014-09-03 11:26:52 -06:00
}
2015-01-02 16:00:54 -07:00
// builderRev is a build configuration type and a revision.
2014-09-03 11:26:52 -06:00
type builderRev struct {
2015-01-02 16:00:54 -07:00
name string // e.g. "linux-amd64-race"
rev string // lowercase hex git hash
2014-09-03 11:26:52 -06:00
}
// returns the part after "docker run"
func ( conf buildConfig ) dockerRunArgs ( rev string ) ( args [ ] string ) {
if key := builderKey ( conf . name ) ; key != "" {
tmpKey := "/tmp/" + conf . name + ".buildkey"
if _ , err := os . Stat ( tmpKey ) ; err != nil {
if err := ioutil . WriteFile ( tmpKey , [ ] byte ( key ) , 0600 ) ; err != nil {
log . Fatal ( err )
}
}
2014-11-09 19:22:35 -07:00
// Images may look for .gobuildkey in / or /root, so provide both.
// TODO(adg): fix images that look in the wrong place.
2014-09-03 11:26:52 -06:00
args = append ( args , "-v" , tmpKey + ":/.gobuildkey" )
2014-11-09 19:22:35 -07:00
args = append ( args , "-v" , tmpKey + ":/root/.gobuildkey" )
2014-09-03 11:26:52 -06:00
}
for _ , pair := range conf . env {
args = append ( args , "-e" , pair )
}
args = append ( args ,
conf . image ,
"/usr/local/bin/builder" ,
"-rev=" + rev ,
2014-09-09 11:54:11 -06:00
"-dashboard=" + conf . dashURL ,
"-tool=" + conf . tool ,
2014-09-26 13:21:08 -06:00
"-buildroot=/" ,
2014-09-03 11:26:52 -06:00
"-v" ,
)
if conf . cmd != "" {
args = append ( args , "-cmd" , conf . cmd )
}
args = append ( args , conf . name )
return
}
func addBuilder ( c buildConfig ) {
2015-01-02 18:16:44 -07:00
if c . tool == "gccgo" {
// TODO(cmang,bradfitz,adg): fix gccgo
return
}
2014-09-03 11:26:52 -06:00
if c . name == "" {
panic ( "empty name" )
}
if * addTemp {
c . name += "-temp"
}
if _ , dup := builders [ c . name ] ; dup {
panic ( "dup name" )
}
2014-09-09 11:54:11 -06:00
if c . dashURL == "" {
c . dashURL = "https://build.golang.org"
}
if c . tool == "" {
c . tool = "go"
}
2014-09-03 11:26:52 -06:00
if strings . HasPrefix ( c . name , "nacl-" ) {
if c . image == "" {
c . image = "gobuilders/linux-x86-nacl"
}
if c . cmd == "" {
c . cmd = "/usr/local/bin/build-command.pl"
}
}
if strings . HasPrefix ( c . name , "linux-" ) && c . image == "" {
c . image = "gobuilders/linux-x86-base"
}
2015-01-02 18:16:44 -07:00
if c . image == "" && c . vmImage == "" {
panic ( "empty image and vmImage" )
}
if c . image != "" && c . vmImage != "" {
panic ( "can't specify both image and vmImage" )
2014-09-03 11:26:52 -06:00
}
builders [ c . name ] = c
}
2014-12-01 10:59:02 -07:00
// returns the part after "docker run"
func ( conf watchConfig ) dockerRunArgs ( ) ( args [ ] string ) {
2014-12-11 19:19:15 -07:00
log . Printf ( "Running watcher with master key %q" , masterKey ( ) )
if key := masterKey ( ) ; len ( key ) > 0 {
2014-12-01 10:59:02 -07:00
tmpKey := "/tmp/watcher.buildkey"
if _ , err := os . Stat ( tmpKey ) ; err != nil {
2014-12-11 19:19:15 -07:00
if err := ioutil . WriteFile ( tmpKey , key , 0600 ) ; err != nil {
2014-12-01 10:59:02 -07:00
log . Fatal ( err )
}
}
2014-12-11 19:19:15 -07:00
// Images may look for .gobuildkey in / or /root, so provide both.
// TODO(adg): fix images that look in the wrong place.
2014-12-01 10:59:02 -07:00
args = append ( args , "-v" , tmpKey + ":/.gobuildkey" )
2014-12-11 19:19:15 -07:00
args = append ( args , "-v" , tmpKey + ":/root/.gobuildkey" )
2014-12-01 10:59:02 -07:00
}
args = append ( args ,
"go-commit-watcher" ,
"/usr/local/bin/watcher" ,
"-repo=" + conf . repo ,
"-dash=" + conf . dash ,
"-poll=" + conf . interval . String ( ) ,
)
return
}
func addWatcher ( c watchConfig ) {
if c . repo == "" {
2014-12-11 19:19:15 -07:00
c . repo = "https://go.googlesource.com/go"
2014-12-01 10:59:02 -07:00
}
if c . dash == "" {
c . dash = "https://build.golang.org/"
}
if c . interval == 0 {
c . interval = 10 * time . Second
}
watchers [ c . repo ] = c
}
2014-09-03 11:26:52 -06:00
func condUpdateImage ( img string ) error {
ii := images [ img ]
if ii == nil {
2015-01-04 22:46:23 -07:00
return fmt . Errorf ( "image %q doesn't exist" , img )
2014-09-03 11:26:52 -06:00
}
ii . mu . Lock ( )
defer ii . mu . Unlock ( )
res , err := http . Head ( ii . url )
if err != nil {
return fmt . Errorf ( "Error checking %s: %v" , ii . url , err )
}
if res . StatusCode != 200 {
return fmt . Errorf ( "Error checking %s: %v" , ii . url , res . Status )
}
if res . Header . Get ( "Last-Modified" ) == ii . lastMod {
return nil
}
res , err = http . Get ( ii . url )
if err != nil || res . StatusCode != 200 {
return fmt . Errorf ( "Get after Head failed for %s: %v, %v" , ii . url , err , res )
}
defer res . Body . Close ( )
log . Printf ( "Running: docker load of %s\n" , ii . url )
cmd := exec . Command ( "docker" , "load" )
cmd . Stdin = res . Body
var out bytes . Buffer
cmd . Stdout = & out
cmd . Stderr = & out
if cmd . Run ( ) ; err != nil {
log . Printf ( "Failed to pull latest %s from %s and pipe into docker load: %v, %s" , img , ii . url , err , out . Bytes ( ) )
return err
}
ii . lastMod = res . Header . Get ( "Last-Modified" )
return nil
}
2015-01-02 18:16:44 -07:00
// numDockerBuilds finds the number of go builder instances currently running.
func numDockerBuilds ( ) ( n int , err error ) {
out , err := exec . Command ( "docker" , "ps" ) . Output ( )
if err != nil {
return 0 , err
}
for _ , line := range strings . Split ( string ( out ) , "\n" ) {
if strings . Contains ( line , "gobuilders/" ) {
n ++
2014-12-01 10:59:02 -07:00
}
}
2015-01-02 18:16:44 -07:00
return n , nil
2014-12-01 10:59:02 -07:00
}
2014-09-03 11:26:52 -06:00
func startBuilding ( conf buildConfig , rev string ) ( * buildStatus , error ) {
2015-01-02 18:16:44 -07:00
if conf . usesVM ( ) {
return startBuildingInVM ( conf , rev )
} else {
return startBuildingInDocker ( conf , rev )
}
}
func startBuildingInDocker ( conf buildConfig , rev string ) ( * buildStatus , error ) {
2014-09-03 11:26:52 -06:00
if err := condUpdateImage ( conf . image ) ; err != nil {
log . Printf ( "Failed to setup container for %v %v: %v" , conf . name , rev , err )
return nil , err
}
cmd := exec . Command ( "docker" , append ( [ ] string { "run" , "-d" } , conf . dockerRunArgs ( rev ) ... ) ... )
all , err := cmd . CombinedOutput ( )
log . Printf ( "Docker run for %v %v = err:%v, output:%s" , conf . name , rev , err , all )
if err != nil {
return nil , err
}
container := strings . TrimSpace ( string ( all ) )
2015-01-02 18:16:44 -07:00
brev := builderRev {
name : conf . name ,
rev : rev ,
2015-01-02 16:00:54 -07:00
}
2015-01-02 18:16:44 -07:00
st := & buildStatus {
builderRev : brev ,
container : container ,
start : time . Now ( ) ,
}
log . Printf ( "%v now building in Docker container %v" , brev , st . container )
2015-01-02 16:00:54 -07:00
go func ( ) {
all , err := exec . Command ( "docker" , "wait" , container ) . CombinedOutput ( )
output := strings . TrimSpace ( string ( all ) )
var ok bool
if err == nil {
exit , err := strconv . Atoi ( output )
ok = ( err == nil && exit == 0 )
}
2015-01-02 18:16:44 -07:00
st . setDone ( ok )
2015-01-02 16:00:54 -07:00
log . Printf ( "docker wait %s/%s: %v, %s" , container , rev , err , output )
donec <- builderRev { conf . name , rev }
exec . Command ( "docker" , "rm" , container ) . Run ( )
} ( )
go func ( ) {
cmd := exec . Command ( "docker" , "logs" , "-f" , container )
2015-01-02 18:16:44 -07:00
cmd . Stdout = st
cmd . Stderr = st
2015-01-02 16:00:54 -07:00
if err := cmd . Run ( ) ; err != nil {
// The docker logs subcommand always returns
// success, even if the underlying process
// fails.
log . Printf ( "failed to follow docker logs of %s: %v" , container , err )
}
} ( )
2015-01-02 18:16:44 -07:00
return st , nil
}
var osArchRx = regexp . MustCompile ( ` ^(\w+-\w+) ` )
2015-01-07 16:44:25 -07:00
func randHex ( n int ) string {
buf := make ( [ ] byte , n / 2 )
_ , err := rand . Read ( buf )
if err != nil {
panic ( "Failed to get randomness: " + err . Error ( ) )
}
return fmt . Sprintf ( "%x" , buf )
}
2015-01-02 18:16:44 -07:00
// startBuildingInVM starts a VM on GCE running the buildlet binary to build rev.
func startBuildingInVM ( conf buildConfig , rev string ) ( * buildStatus , error ) {
brev := builderRev {
name : conf . name ,
rev : rev ,
}
st := & buildStatus {
builderRev : brev ,
start : time . Now ( ) ,
}
// name is the project-wide unique name of the GCE instance. It can't be longer
// than 61 bytes, so we only use the first 8 bytes of the rev.
2015-01-07 16:44:25 -07:00
name := "buildlet-" + conf . name + "-" + rev [ : 8 ] + "-rn" + randHex ( 6 )
2015-01-02 18:16:44 -07:00
// buildletURL is the URL of the buildlet binary which the VMs
// are configured to download at boot and run. This lets us
// update the buildlet more easily than rebuilding the whole
// VM image. We put this URL in a well-known GCE metadata attribute.
// The value will be of the form:
// http://storage.googleapis.com/go-builder-data/buildlet.GOOS-GOARCH
m := osArchRx . FindStringSubmatch ( conf . name )
if m == nil {
return nil , fmt . Errorf ( "invalid builder name %q" , conf . name )
}
buildletURL := "http://storage.googleapis.com/go-builder-data/buildlet." + m [ 1 ]
prefix := "https://www.googleapis.com/compute/v1/projects/" + projectID
machType := prefix + "/zones/" + projectZone + "/machineTypes/" + conf . MachineType ( )
instance := & compute . Instance {
Name : name ,
Description : fmt . Sprintf ( "Go Builder building %s %s" , conf . name , rev ) ,
MachineType : machType ,
Disks : [ ] * compute . AttachedDisk {
{
AutoDelete : true ,
Boot : true ,
Type : "PERSISTENT" ,
InitializeParams : & compute . AttachedDiskInitializeParams {
DiskName : name ,
SourceImage : "https://www.googleapis.com/compute/v1/projects/" + projectID + "/global/images/" + conf . vmImage ,
DiskType : "https://www.googleapis.com/compute/v1/projects/" + projectID + "/zones/" + projectZone + "/diskTypes/pd-ssd" ,
} ,
} ,
} ,
Tags : & compute . Tags {
// Warning: do NOT list "http-server" or "allow-ssh" (our
// project's custom tag to allow ssh access) here; the
// buildlet provides full remote code execution.
Items : [ ] string { } ,
} ,
Metadata : & compute . Metadata {
Items : [ ] * compute . MetadataItems {
{
Key : "buildlet-binary-url" ,
Value : buildletURL ,
} ,
// In case the VM gets away from us (generally: if the
// coordinator dies while a build is running), then we
// set this attribute of when it should be killed so
// we can kill it later when the coordinator is
// restarted. The cleanUpOldVMs goroutine loop handles
// that killing.
{
Key : "delete-at" ,
2015-01-07 16:44:25 -07:00
Value : fmt . Sprint ( time . Now ( ) . Add ( vmDeleteTimeout ) . Unix ( ) ) ,
2015-01-02 18:16:44 -07:00
} ,
} ,
} ,
NetworkInterfaces : [ ] * compute . NetworkInterface {
& compute . NetworkInterface {
AccessConfigs : [ ] * compute . AccessConfig {
& compute . AccessConfig {
Type : "ONE_TO_ONE_NAT" ,
Name : "External NAT" ,
} ,
} ,
Network : prefix + "/global/networks/default" ,
} ,
} ,
}
op , err := computeService . Instances . Insert ( projectID , projectZone , instance ) . Do ( )
if err != nil {
return nil , fmt . Errorf ( "Failed to create instance: %v" , err )
}
st . createOp = op . Name
st . instName = name
log . Printf ( "%v now building in VM %v" , brev , st . instName )
// Start the goroutine to monitor the VM now that it's booting. This might
// take minutes for it to come up, and then even more time to do the build.
go func ( ) {
err := watchVM ( st )
2015-01-04 22:46:23 -07:00
if st . hasEvent ( "instance_created" ) {
deleteVM ( projectZone , st . instName )
}
2015-01-02 18:16:44 -07:00
st . setDone ( err == nil )
if err != nil {
fmt . Fprintf ( st , "\n\nError: %v\n" , err )
}
donec <- builderRev { conf . name , rev }
} ( )
return st , nil
}
// watchVM monitors a VM doing a build.
2015-01-07 16:44:25 -07:00
func watchVM ( st * buildStatus ) ( retErr error ) {
2015-01-02 18:16:44 -07:00
goodRes := func ( res * http . Response , err error , what string ) bool {
if err != nil {
2015-01-07 16:44:25 -07:00
retErr = fmt . Errorf ( "%s: %v" , what , err )
2015-01-02 18:16:44 -07:00
return false
}
if res . StatusCode / 100 != 2 {
2015-01-07 16:44:25 -07:00
slurp , _ := ioutil . ReadAll ( io . LimitReader ( res . Body , 4 << 10 ) )
retErr = fmt . Errorf ( "%s: %v; body: %s" , what , res . Status , slurp )
res . Body . Close ( )
2015-01-02 18:16:44 -07:00
return false
}
return true
}
st . logEventTime ( "instance_create_requested" )
// Wait for instance create operation to succeed.
OpLoop :
for {
time . Sleep ( 2 * time . Second )
op , err := computeService . ZoneOperations . Get ( projectID , projectZone , st . createOp ) . Do ( )
if err != nil {
return fmt . Errorf ( "Failed to get op %s: %v" , st . createOp , err )
}
switch op . Status {
case "PENDING" , "RUNNING" :
continue
case "DONE" :
if op . Error != nil {
for _ , operr := range op . Error . Errors {
return fmt . Errorf ( "Error creating instance: %+v" , operr )
}
return errors . New ( "Failed to start." )
}
break OpLoop
default :
log . Fatalf ( "Unknown status %q: %+v" , op . Status , op )
}
}
st . logEventTime ( "instance_created" )
inst , err := computeService . Instances . Get ( projectID , projectZone , st . instName ) . Do ( )
if err != nil {
return fmt . Errorf ( "Error getting instance %s details after creation: %v" , st . instName , err )
}
st . logEventTime ( "got_instance_info" )
// Find its internal IP.
var ip string
for _ , iface := range inst . NetworkInterfaces {
if strings . HasPrefix ( iface . NetworkIP , "10." ) {
ip = iface . NetworkIP
}
}
if ip == "" {
return errors . New ( "didn't find its internal IP address" )
}
// Wait for it to boot and its buildlet to come up on port 80.
st . logEventTime ( "waiting_for_buildlet" )
buildletURL := "http://" + ip
const numTries = 60
var alive bool
for i := 1 ; i <= numTries ; i ++ {
res , err := http . Get ( buildletURL )
if err != nil {
time . Sleep ( 1 * time . Second )
continue
}
res . Body . Close ( )
if res . StatusCode != 200 {
return fmt . Errorf ( "buildlet returned HTTP status code %d on try number %d" , res . StatusCode , i )
}
st . logEventTime ( "buildlet_up" )
alive = true
break
}
if ! alive {
return fmt . Errorf ( "buildlet didn't come up in %d seconds" , numTries )
}
// Write the VERSION file.
st . logEventTime ( "start_write_version_tar" )
verReq , err := http . NewRequest ( "PUT" , buildletURL + "/writetgz" , versionTgz ( st . rev ) )
if err != nil {
return err
}
verRes , err := http . DefaultClient . Do ( verReq )
if ! goodRes ( verRes , err , "writing VERSION tgz" ) {
return
}
// Feed the buildlet a tar file for it to extract.
// TODO: cache these.
st . logEventTime ( "start_fetch_gerrit_tgz" )
tarRes , err := http . Get ( "https://go.googlesource.com/go/+archive/" + st . rev + ".tar.gz" )
if ! goodRes ( tarRes , err , "fetching tarball from Gerrit" ) {
return
}
st . logEventTime ( "start_write_tar" )
putReq , err := http . NewRequest ( "PUT" , buildletURL + "/writetgz" , tarRes . Body )
if err != nil {
tarRes . Body . Close ( )
return err
}
putRes , err := http . DefaultClient . Do ( putReq )
st . logEventTime ( "end_write_tar" )
tarRes . Body . Close ( )
if ! goodRes ( putRes , err , "writing tarball to buildlet" ) {
return
}
// Run the builder
cmd := "all.bash"
if strings . HasPrefix ( st . name , "windows-" ) {
cmd = "all.bat"
} else if strings . HasPrefix ( st . name , "plan9-" ) {
cmd = "all.rc"
}
execStartTime := time . Now ( )
st . logEventTime ( "start_exec" )
res , err := http . PostForm ( buildletURL + "/exec" , url . Values { "cmd" : { "src/" + cmd } } )
if ! goodRes ( res , err , "running " + cmd ) {
return
}
defer res . Body . Close ( )
st . logEventTime ( "running_exec" )
// Stream the output:
if _ , err := io . Copy ( st , res . Body ) ; err != nil {
return fmt . Errorf ( "error copying response: %v" , err )
}
st . logEventTime ( "done" )
// Don't record to the dashboard unless we heard the trailer from
// the buildlet, otherwise it was probably some unrelated error
// (like the VM being killed, or the buildlet crashing due to
// e.g. https://golang.org/issue/9309, since we require a tip
// build of the buildlet to get Trailers support)
2015-01-07 20:49:00 -07:00
state := res . Trailer . Get ( "Process-State" )
if state == "" {
return errors . New ( "missing Process-State trailer from HTTP response; buildlet built with old (<= 1.4) Go?" )
2015-01-02 18:16:44 -07:00
}
2015-01-07 20:49:00 -07:00
conf := builders [ st . name ]
var log string
if state != "ok" {
log = st . logs ( )
}
if err := conf . recordResult ( state == "ok" , st . rev , log , time . Since ( execStartTime ) ) ; err != nil {
return fmt . Errorf ( "Status was %q but failed to report it to the dashboard: %v" , state , err )
}
if state != "ok" {
return fmt . Errorf ( "%s failed: %v" , cmd , state )
2015-01-02 18:16:44 -07:00
}
return nil
}
type eventAndTime struct {
evt string
t time . Time
2014-09-03 11:26:52 -06:00
}
2015-01-02 16:00:54 -07:00
// buildStatus is the status of a build.
2014-09-03 11:26:52 -06:00
type buildStatus struct {
2015-01-02 16:00:54 -07:00
// Immutable:
2014-09-03 11:26:52 -06:00
builderRev
start time . Time
2015-01-02 16:00:54 -07:00
container string // container ID for docker, else it's a VM
2015-01-02 18:16:44 -07:00
// Immutable, used by VM only:
createOp string // Instances.Insert operation name
instName string
2015-01-02 16:00:54 -07:00
mu sync . Mutex // guards following
done time . Time // finished running
succeeded bool // set when done
output bytes . Buffer // stdout and stderr
2015-01-02 18:16:44 -07:00
events [ ] eventAndTime
2015-01-02 16:00:54 -07:00
}
2014-09-03 11:26:52 -06:00
2015-01-02 16:00:54 -07:00
func ( st * buildStatus ) setDone ( succeeded bool ) {
st . mu . Lock ( )
defer st . mu . Unlock ( )
st . succeeded = succeeded
st . done = time . Now ( )
}
2015-01-02 18:16:44 -07:00
func ( st * buildStatus ) logEventTime ( event string ) {
st . mu . Lock ( )
defer st . mu . Unlock ( )
st . events = append ( st . events , eventAndTime { event , time . Now ( ) } )
}
2015-01-04 22:46:23 -07:00
func ( st * buildStatus ) hasEvent ( event string ) bool {
st . mu . Lock ( )
defer st . mu . Unlock ( )
for _ , e := range st . events {
if e . evt == event {
return true
}
}
return false
}
2015-01-02 16:00:54 -07:00
// htmlStatusLine returns the HTML to show within the <pre> block on
// the main page's list of active builds.
func ( st * buildStatus ) htmlStatusLine ( ) string {
st . mu . Lock ( )
defer st . mu . Unlock ( )
urlPrefix := "https://go-review.googlesource.com/#/q/"
if strings . Contains ( st . name , "gccgo" ) {
urlPrefix = "https://code.google.com/p/gofrontend/source/detail?r="
}
var buf bytes . Buffer
fmt . Fprintf ( & buf , "<a href='https://github.com/golang/go/wiki/DashboardBuilders'>%s</a> rev <a href='%s%s'>%s</a>" ,
st . name , urlPrefix , st . rev , st . rev )
if st . done . IsZero ( ) {
buf . WriteString ( ", running" )
} else if st . succeeded {
buf . WriteString ( ", succeeded" )
} else {
buf . WriteString ( ", failed" )
}
2015-01-02 18:16:44 -07:00
logsURL := fmt . Sprintf ( "/logs?name=%s&rev=%s&st=%p" , st . name , st . rev , st )
2015-01-02 16:00:54 -07:00
if st . container != "" {
2015-01-02 18:16:44 -07:00
fmt . Fprintf ( & buf , " in container <a href='%s'>%s</a>" , logsURL , st . container )
} else {
fmt . Fprintf ( & buf , " in VM <a href='%s'>%s</a>" , logsURL , st . instName )
2015-01-02 16:00:54 -07:00
}
t := st . done
if t . IsZero ( ) {
t = st . start
}
fmt . Fprintf ( & buf , ", %v ago\n" , time . Since ( t ) )
2015-01-02 18:16:44 -07:00
for i , evt := range st . events {
var elapsed string
if i != 0 {
elapsed = fmt . Sprintf ( "+%0.1fs" , evt . t . Sub ( st . events [ i - 1 ] . t ) . Seconds ( ) )
}
msg := evt . evt
if msg == "running_exec" {
msg = fmt . Sprintf ( "<a href='%s'>%s</a>" , logsURL , msg )
}
fmt . Fprintf ( & buf , " %7s %v %s\n" , elapsed , evt . t . Format ( time . RFC3339 ) , msg )
}
2015-01-02 16:00:54 -07:00
return buf . String ( )
}
func ( st * buildStatus ) logs ( ) string {
st . mu . Lock ( )
logs := st . output . String ( )
st . mu . Unlock ( )
key := builderKey ( st . name )
return strings . Replace ( string ( logs ) , key , "BUILDERKEY" , - 1 )
}
func ( st * buildStatus ) Write ( p [ ] byte ) ( n int , err error ) {
st . mu . Lock ( )
defer st . mu . Unlock ( )
const maxBufferSize = 2 << 20 // 2MB of output is way more than we expect.
2015-01-02 18:16:44 -07:00
plen := len ( p )
2015-01-02 16:00:54 -07:00
if st . output . Len ( ) + len ( p ) > maxBufferSize {
p = p [ : maxBufferSize - st . output . Len ( ) ]
}
2015-01-02 18:16:44 -07:00
st . output . Write ( p ) // bytes.Buffer can't fail
return plen , nil
}
// Stop any previous go-commit-watcher Docker tasks, so they don't
// pile up upon restarts of the coordinator.
func stopWatchers ( ) {
out , err := exec . Command ( "docker" , "ps" ) . Output ( )
if err != nil {
return
}
for _ , line := range strings . Split ( string ( out ) , "\n" ) {
if ! strings . Contains ( line , "go-commit-watcher:" ) {
continue
}
f := strings . Fields ( line )
exec . Command ( "docker" , "rm" , "-f" , "-v" , f [ 0 ] ) . Run ( )
}
2014-09-03 11:26:52 -06:00
}
2014-12-11 19:19:15 -07:00
func startWatching ( conf watchConfig ) ( err error ) {
defer func ( ) {
if err != nil {
restartWatcherSoon ( conf )
}
} ( )
log . Printf ( "Starting watcher for %v" , conf . repo )
2014-12-01 10:59:02 -07:00
if err := condUpdateImage ( "go-commit-watcher" ) ; err != nil {
log . Printf ( "Failed to setup container for commit watcher: %v" , err )
return err
}
cmd := exec . Command ( "docker" , append ( [ ] string { "run" , "-d" } , conf . dockerRunArgs ( ) ... ) ... )
all , err := cmd . CombinedOutput ( )
2014-12-11 19:19:15 -07:00
if err != nil {
log . Printf ( "Docker run for commit watcher = err:%v, output: %s" , err , all )
return err
}
container := strings . TrimSpace ( string ( all ) )
// Start a goroutine to wait for the watcher to die.
go func ( ) {
exec . Command ( "docker" , "wait" , container ) . Run ( )
exec . Command ( "docker" , "rm" , "-v" , container ) . Run ( )
log . Printf ( "Watcher crashed. Restarting soon." )
restartWatcherSoon ( conf )
} ( )
return nil
}
func restartWatcherSoon ( conf watchConfig ) {
time . AfterFunc ( 30 * time . Second , func ( ) {
startWatching ( conf )
} )
2014-12-01 10:59:02 -07:00
}
2014-09-03 11:26:52 -06:00
func builderKey ( builder string ) string {
master := masterKey ( )
if len ( master ) == 0 {
return ""
}
h := hmac . New ( md5 . New , master )
io . WriteString ( h , builder )
return fmt . Sprintf ( "%x" , h . Sum ( nil ) )
}
func masterKey ( ) [ ] byte {
keyOnce . Do ( loadKey )
return masterKeyCache
}
var (
keyOnce sync . Once
masterKeyCache [ ] byte
)
func loadKey ( ) {
if * masterKeyFile != "" {
b , err := ioutil . ReadFile ( * masterKeyFile )
if err != nil {
log . Fatal ( err )
}
masterKeyCache = bytes . TrimSpace ( b )
return
}
req , _ := http . NewRequest ( "GET" , "http://metadata.google.internal/computeMetadata/v1/project/attributes/builder-master-key" , nil )
req . Header . Set ( "Metadata-Flavor" , "Google" )
res , err := http . DefaultClient . Do ( req )
if err != nil {
log . Fatal ( "No builder master key available" )
}
defer res . Body . Close ( )
if res . StatusCode != 200 {
log . Fatalf ( "No builder-master-key project attribute available." )
}
slurp , err := ioutil . ReadAll ( res . Body )
if err != nil {
log . Fatal ( err )
}
masterKeyCache = bytes . TrimSpace ( slurp )
}
2014-12-14 17:51:42 -07:00
func cleanUpOldContainers ( ) {
for {
for _ , cid := range oldContainers ( ) {
log . Printf ( "Cleaning old container %v" , cid )
exec . Command ( "docker" , "rm" , "-v" , cid ) . Run ( )
}
time . Sleep ( 30 * time . Second )
}
}
func oldContainers ( ) [ ] string {
out , _ := exec . Command ( "docker" , "ps" , "-a" , "--filter=status=exited" , "--no-trunc" , "-q" ) . Output ( )
return strings . Fields ( string ( out ) )
}
2014-12-30 17:19:06 -07:00
// cleanUpOldVMs loops forever and periodically enumerates virtual
// machines and deletes those which have expired.
//
// A VM is considered expired if it has a "delete-at" metadata
// attribute having a unix timestamp before the current time.
//
// This is the safety mechanism to delete VMs which stray from the
// normal deleting process. VMs are created to run a single build and
// should be shut down by a controlling process. Due to various types
// of failures, they might get stranded. To prevent them from getting
// stranded and wasting resources forever, we instead set the
// "delete-at" metadata attribute on them when created to some time
// that's well beyond their expected lifetime.
func cleanUpOldVMs ( ) {
2015-01-02 18:16:44 -07:00
if computeService == nil {
2014-12-30 17:19:06 -07:00
return
}
for {
for _ , zone := range strings . Split ( * cleanZones , "," ) {
zone = strings . TrimSpace ( zone )
2015-01-02 18:16:44 -07:00
if err := cleanZoneVMs ( zone ) ; err != nil {
2014-12-30 17:19:06 -07:00
log . Printf ( "Error cleaning VMs in zone %q: %v" , zone , err )
}
}
time . Sleep ( time . Minute )
}
}
// cleanZoneVMs is part of cleanUpOldVMs, operating on a single zone.
2015-01-02 18:16:44 -07:00
func cleanZoneVMs ( zone string ) error {
2014-12-30 17:19:06 -07:00
// Fetch the first 500 (default) running instances and clean
// thoes. We expect that we'll be running many fewer than
// that. Even if we have more, eventually the first 500 will
// either end or be cleaned, and then the next call will get a
// partially-different 500.
// TODO(bradfitz): revist this code if we ever start running
// thousands of VMs.
2015-01-02 18:16:44 -07:00
list , err := computeService . Instances . List ( projectID , zone ) . Do ( )
2014-12-30 17:19:06 -07:00
if err != nil {
return fmt . Errorf ( "listing instances: %v" , err )
}
for _ , inst := range list . Items {
2015-01-07 16:44:25 -07:00
if ! strings . HasPrefix ( inst . Name , "buildlet-" ) {
// We only delete ones we created.
continue
}
2014-12-30 17:19:06 -07:00
if inst . Metadata == nil {
// Defensive. Not seen in practice.
continue
}
2015-01-07 16:44:25 -07:00
sawDeleteAt := false
2014-12-30 17:19:06 -07:00
for _ , it := range inst . Metadata . Items {
if it . Key == "delete-at" {
2015-01-07 16:44:25 -07:00
sawDeleteAt = true
2014-12-30 17:19:06 -07:00
unixDeadline , err := strconv . ParseInt ( it . Value , 10 , 64 )
if err != nil {
log . Printf ( "invalid delete-at value %q seen; ignoring" , it . Value )
}
if err == nil && time . Now ( ) . Unix ( ) > unixDeadline {
log . Printf ( "Deleting expired VM %q in zone %q ..." , inst . Name , zone )
2015-01-02 18:16:44 -07:00
deleteVM ( zone , inst . Name )
2014-12-30 17:19:06 -07:00
}
}
}
2015-01-07 16:44:25 -07:00
if sawDeleteAt && ! vmIsBuilding ( inst . Name ) {
log . Printf ( "Deleting VM %q in zone %q from an earlier coordinator generation ..." , inst . Name , zone )
deleteVM ( zone , inst . Name )
}
2014-12-30 17:19:06 -07:00
}
return nil
}
2015-01-02 18:16:44 -07:00
func deleteVM ( zone , instName string ) {
op , err := computeService . Instances . Delete ( projectID , zone , instName ) . Do ( )
2014-12-30 17:19:06 -07:00
if err != nil {
log . Printf ( "Failed to delete instance %q in zone %q: %v" , instName , zone , err )
return
}
log . Printf ( "Sent request to delete instance %q in zone %q. Operation ID == %v" , instName , zone , op . Id )
}
func hasComputeScope ( ) bool {
if ! metadata . OnGCE ( ) {
return false
}
scopes , err := metadata . Scopes ( "default" )
if err != nil {
log . Printf ( "failed to query metadata default scopes: %v" , err )
return false
}
for _ , v := range scopes {
if v == compute . DevstorageFull_controlScope {
return true
}
}
return false
}
2015-01-02 18:16:44 -07:00
// dash is copied from the builder binary. It runs the given method and command on the dashboard.
//
// TODO(bradfitz,adg): unify this somewhere?
//
// If args is non-nil it is encoded as the URL query string.
// If req is non-nil it is JSON-encoded and passed as the body of the HTTP POST.
// If resp is non-nil the server's response is decoded into the value pointed
// to by resp (resp must be a pointer).
func dash ( meth , cmd string , args url . Values , req , resp interface { } ) error {
const builderVersion = 1 // keep in sync with dashboard/app/build/handler.go
argsCopy := url . Values { "version" : { fmt . Sprint ( builderVersion ) } }
for k , v := range args {
if k == "version" {
panic ( ` dash: reserved args key: "version" ` )
}
argsCopy [ k ] = v
}
var r * http . Response
var err error
cmd = "https://build.golang.org/" + cmd + "?" + argsCopy . Encode ( )
switch meth {
case "GET" :
if req != nil {
log . Panicf ( "%s to %s with req" , meth , cmd )
}
r , err = http . Get ( cmd )
case "POST" :
var body io . Reader
if req != nil {
b , err := json . Marshal ( req )
if err != nil {
return err
}
body = bytes . NewBuffer ( b )
}
r , err = http . Post ( cmd , "text/json" , body )
default :
log . Panicf ( "%s: invalid method %q" , cmd , meth )
panic ( "invalid method: " + meth )
}
if err != nil {
return err
}
defer r . Body . Close ( )
if r . StatusCode != http . StatusOK {
return fmt . Errorf ( "bad http response: %v" , r . Status )
}
body := new ( bytes . Buffer )
if _ , err := body . ReadFrom ( r . Body ) ; err != nil {
return err
}
// Read JSON-encoded Response into provided resp
// and return an error if present.
var result = struct {
Response interface { }
Error string
} {
// Put the provided resp in here as it can be a pointer to
// some value we should unmarshal into.
Response : resp ,
}
if err = json . Unmarshal ( body . Bytes ( ) , & result ) ; err != nil {
log . Printf ( "json unmarshal %#q: %s\n" , body . Bytes ( ) , err )
return err
}
if result . Error != "" {
return errors . New ( result . Error )
}
return nil
}
func versionTgz ( rev string ) io . Reader {
var buf bytes . Buffer
zw := gzip . NewWriter ( & buf )
tw := tar . NewWriter ( zw )
contents := fmt . Sprintf ( "devel " + rev )
check ( tw . WriteHeader ( & tar . Header {
Name : "VERSION" ,
Mode : 0644 ,
Size : int64 ( len ( contents ) ) ,
} ) )
_ , err := io . WriteString ( tw , contents )
check ( err )
check ( tw . Close ( ) )
check ( zw . Close ( ) )
return bytes . NewReader ( buf . Bytes ( ) )
}
// check is only for things which should be impossible (not even rare)
// to fail.
func check ( err error ) {
if err != nil {
panic ( "previously assumed to never fail: " + err . Error ( ) )
}
}