1
0
mirror of https://github.com/golang/go synced 2024-11-18 17:54:57 -07:00

dashboard/coordinator: use VMs to run non-Linux builders

Currently this is running an OpenBSD amd64 builder, but other operating
systems are now easy.

Updates golang/go#8642
Updates golang/go#9492

Change-Id: I4e1026c49ee4d253dfcf78c7606767294784f02e
Reviewed-on: https://go-review.googlesource.com/2264
Reviewed-by: Andrew Gerrand <adg@golang.org>
This commit is contained in:
Brad Fitzpatrick 2015-01-02 17:16:44 -08:00
parent 026c0f4d1d
commit 58c8b8a738

View File

@ -8,10 +8,13 @@
package main // import "golang.org/x/tools/dashboard/coordinator"
import (
"archive/tar"
"bytes"
"compress/gzip"
"crypto/hmac"
"crypto/md5"
"encoding/json"
"errors"
"flag"
"fmt"
"html"
@ -19,8 +22,11 @@ import (
"io/ioutil"
"log"
"net/http"
"net/url"
"os"
"os/exec"
"path"
"regexp"
"sort"
"strconv"
"strings"
@ -34,8 +40,8 @@ import (
)
var (
masterKeyFile = flag.String("masterkey", "", "Path to builder master key. Else fetched using GCE project attribute 'builder-master-key'.")
maxBuilds = flag.Int("maxbuilds", 6, "Max concurrent builds")
masterKeyFile = flag.String("masterkey", "", "Path to builder master key. Else fetched using GCE project attribute 'builder-master-key'.")
maxLocalBuilds = flag.Int("maxbuilds", 6, "Max concurrent Docker builds (VM builds don't count)")
cleanZones = flag.String("zones", "us-central1-a,us-central1-b,us-central1-f", "Comma-separated list of zones to periodically clean of stale build VMs (ones that failed to shut themselves down)")
@ -47,8 +53,8 @@ var (
var (
startTime = time.Now()
builders = map[string]buildConfig{} // populated once at startup
watchers = map[string]watchConfig{} // populated once at startup
builders = map[string]buildConfig{} // populated at startup, keys like "openbsd-amd64-56"
watchers = map[string]watchConfig{} // populated at startup, keyed by repo, e.g. "https://go.googlesource.com/go"
donec = make(chan builderRev) // reports of finished builders
statusMu sync.Mutex // guards both status (ongoing ones) and statusDone (just finished)
@ -58,6 +64,37 @@ var (
const maxStatusDone = 30
// Initialized by initGCE:
var (
projectID string
projectZone string
computeService *compute.Service
)
func initGCE() error {
if !metadata.OnGCE() {
return errors.New("not running on GCE; VM support disabled")
}
var err error
projectID, err = metadata.ProjectID()
if err != nil {
return fmt.Errorf("failed to get current GCE ProjectID: %v", err)
}
projectZone, err = metadata.Get("instance/zone")
if err != nil || projectZone == "" {
return fmt.Errorf("failed to get current GCE zone: %v", err)
}
// Convert the zone from "projects/1234/zones/us-central1-a" to "us-central1-a".
projectZone = path.Base(projectZone)
if !hasComputeScope() {
return errors.New("The coordinator is not running with access to read and write Compute resources. VM support disabled.")
}
ts := google.ComputeTokenSource("default")
computeService, _ = compute.New(oauth2.NewClient(oauth2.NoContext, ts))
return nil
}
type imageInfo struct {
url string // of tar file
@ -74,8 +111,15 @@ var images = map[string]*imageInfo{
"gobuilders/linux-x86-sid": {url: "https://storage.googleapis.com/go-builder-data/docker-linux.sid.tar.gz"},
}
// A buildConfig describes how to run either a Docker-based or VM-based build.
type buildConfig struct {
name string // "linux-amd64-race"
name string // "linux-amd64-race"
// VM-specific settings: (used if vmImage != "")
vmImage string // e.g. "openbsd-amd64-56"
machineType string // optional GCE instance type
// Docker-specific settings: (used if vmImage == "")
image string // Docker image to use to build
cmd string // optional -cmd flag (relative to go/src/)
env []string // extra environment ("key=value") pairs
@ -83,6 +127,31 @@ type buildConfig struct {
tool string // the tool this configuration is for
}
func (c *buildConfig) usesDocker() bool { return c.vmImage == "" }
func (c *buildConfig) usesVM() bool { return c.vmImage != "" }
func (c *buildConfig) MachineType() string {
if v := c.machineType; v != "" {
return v
}
return "n1-highcpu-4"
}
// recordResult sends build results to the dashboard
func (b *buildConfig) recordResult(ok bool, hash, buildLog string, runTime time.Duration) error {
req := map[string]interface{}{
"Builder": b.name,
"PackagePath": "",
"Hash": hash,
"GoHash": "",
"OK": ok,
"Log": buildLog,
"RunTime": runTime,
}
args := url.Values{"key": {builderKey(b.name)}, "builder": {b.name}}
return dash("POST", "result", args, req, nil)
}
type watchConfig struct {
repo string // "https://go.googlesource.com/go"
dash string // "https://build.golang.org/" (must end in /)
@ -91,6 +160,11 @@ type watchConfig struct {
func main() {
flag.Parse()
if err := initGCE(); err != nil {
log.Printf("VM support disabled due to error initializing GCE: %v", err)
}
addBuilder(buildConfig{name: "linux-386"})
addBuilder(buildConfig{name: "linux-386-387", env: []string{"GO386=387"}})
addBuilder(buildConfig{name: "linux-amd64"})
@ -118,6 +192,10 @@ func main() {
addBuilder(buildConfig{name: "linux-386-clang", image: "gobuilders/linux-x86-clang"})
addBuilder(buildConfig{name: "linux-amd64-clang", image: "gobuilders/linux-x86-clang"})
// VMs:
// addBuilder(buildConfig{name: "openbsd-amd64-gce56", vmImage: "openbsd-amd64-56"})
// addBuilder(buildConfig{name: "plan9-386-gce", vmImage: "plan9-386"})
addWatcher(watchConfig{repo: "https://go.googlesource.com/go", dash: "https://build.golang.org/"})
// TODO(adg,cmang): fix gccgo watcher
// addWatcher(watchConfig{repo: "https://code.google.com/p/gofrontend", dash: "https://build.golang.org/gccgo/"})
@ -146,6 +224,7 @@ func main() {
go cleanUpOldContainers()
go cleanUpOldVMs()
stopWatchers() // clean up before we start new ones
for _, watcher := range watchers {
if err := startWatching(watchers[watcher.repo]); err != nil {
log.Printf("Error starting watcher for %s: %v", watcher.repo, err)
@ -161,17 +240,11 @@ func main() {
for {
select {
case work := <-workc:
log.Printf("workc received %+v; len(status) = %v, maxBuilds = %v; cur = %p", work, len(status), *maxBuilds, status[work])
mayBuild := mayBuildRev(work)
if mayBuild {
if numBuilds() > *maxBuilds {
mayBuild = false
}
}
if mayBuild {
if st, err := startBuilding(builders[work.name], work.rev); err == nil {
log.Printf("workc received %+v; len(status) = %v, maxLocalBuilds = %v; cur = %p", work, len(status), *maxLocalBuilds, status[work])
if mayBuildRev(work) {
conf := builders[work.name]
if st, err := startBuilding(conf, work.rev); err == nil {
setStatus(work, st)
log.Printf("%v now building in %v", work, st.container)
} else {
log.Printf("Error starting to build %v: %v", work, err)
}
@ -193,10 +266,28 @@ func numCurrentBuilds() int {
return len(status)
}
// mayBuildRev reports whether the build type & revision should be started.
// It returns true if it's not already building, and there is capacity.
func mayBuildRev(work builderRev) bool {
conf := builders[work.name]
statusMu.Lock()
defer statusMu.Unlock()
return len(status) < *maxBuilds && status[work] == nil
_, building := status[work]
statusMu.Unlock()
if building {
return false
}
if conf.usesVM() {
// These don't count towards *maxLocalBuilds.
return true
}
numDocker, err := numDockerBuilds()
if err != nil {
log.Printf("not starting %v due to docker ps failure: %v", work, err)
return false
}
return numDocker < *maxLocalBuilds
}
func setStatus(work builderRev, st *buildStatus) {
@ -220,14 +311,20 @@ func markDone(work builderRev) {
statusDone = append(statusDone, st)
}
func getStatus(work builderRev) *buildStatus {
// statusPtrStr disambiguates which status to return if there are
// multiple in the history (e.g. recent failures where the build
// didn't finish for reasons outside of all.bash failing)
func getStatus(work builderRev, statusPtrStr string) *buildStatus {
statusMu.Lock()
defer statusMu.Unlock()
if st, ok := status[work]; ok {
match := func(st *buildStatus) bool {
return statusPtrStr == "" || fmt.Sprintf("%p", st) == statusPtrStr
}
if st, ok := status[work]; ok && match(st) {
return st
}
for _, st := range statusDone {
if st.builderRev == work {
if st.builderRev == work && match(st) {
return st
}
}
@ -248,14 +345,23 @@ func handleStatus(w http.ResponseWriter, r *http.Request) {
active = append(active, st)
}
recent = append(recent, statusDone...)
numTotal := len(status)
numDocker, err := numDockerBuilds()
statusMu.Unlock()
sort.Sort(byAge(active))
sort.Sort(byAge(recent))
sort.Sort(sort.Reverse(byAge(recent)))
io.WriteString(w, "<html><body><h1>Go build coordinator</h1>")
fmt.Fprintf(w, "<h2>running</h2>%d of max %d builds running:<p><pre>", len(status), *maxBuilds)
if err != nil {
fmt.Fprintf(w, "<h2>Error</h2>Error fetching Docker build count: <i>%s</i>\n", html.EscapeString(err.Error()))
}
fmt.Fprintf(w, "<h2>running</h2><p>%d total builds active (Docker: %d/%d; VMs: %d/∞):",
numTotal, numDocker, *maxLocalBuilds, numTotal-numDocker)
io.WriteString(w, "<pre>")
for _, st := range active {
io.WriteString(w, st.htmlStatusLine())
}
@ -276,7 +382,7 @@ func diskFree() string {
}
func handleLogs(w http.ResponseWriter, r *http.Request) {
st := getStatus(builderRev{r.FormValue("name"), r.FormValue("rev")})
st := getStatus(builderRev{r.FormValue("name"), r.FormValue("rev")}, r.FormValue("st"))
if st == nil {
http.NotFound(w, r)
return
@ -367,6 +473,10 @@ func (conf buildConfig) dockerRunArgs(rev string) (args []string) {
}
func addBuilder(c buildConfig) {
if c.tool == "gccgo" {
// TODO(cmang,bradfitz,adg): fix gccgo
return
}
if c.name == "" {
panic("empty name")
}
@ -394,8 +504,11 @@ func addBuilder(c buildConfig) {
if strings.HasPrefix(c.name, "linux-") && c.image == "" {
c.image = "gobuilders/linux-x86-base"
}
if c.image == "" {
panic("empty image")
if c.image == "" && c.vmImage == "" {
panic("empty image and vmImage")
}
if c.image != "" && c.vmImage != "" {
panic("can't specify both image and vmImage")
}
builders[c.name] = c
}
@ -478,21 +591,29 @@ func condUpdateImage(img string) error {
return nil
}
// numBuilds finds the number of go builder instances currently running.
func numBuilds() int {
out, _ := exec.Command("docker", "ps").Output()
numBuilds := 0
ps := bytes.Split(out, []byte("\n"))
for _, p := range ps {
if bytes.HasPrefix(p, []byte("gobuilders/")) {
numBuilds++
// numDockerBuilds finds the number of go builder instances currently running.
func numDockerBuilds() (n int, err error) {
out, err := exec.Command("docker", "ps").Output()
if err != nil {
return 0, err
}
for _, line := range strings.Split(string(out), "\n") {
if strings.Contains(line, "gobuilders/") {
n++
}
}
log.Printf("num current docker builds: %d", numBuilds)
return numBuilds
return n, nil
}
func startBuilding(conf buildConfig, rev string) (*buildStatus, error) {
if conf.usesVM() {
return startBuildingInVM(conf, rev)
} else {
return startBuildingInDocker(conf, rev)
}
}
func startBuildingInDocker(conf buildConfig, rev string) (*buildStatus, error) {
if err := condUpdateImage(conf.image); err != nil {
log.Printf("Failed to setup container for %v %v: %v", conf.name, rev, err)
return nil, err
@ -505,14 +626,16 @@ func startBuilding(conf buildConfig, rev string) (*buildStatus, error) {
return nil, err
}
container := strings.TrimSpace(string(all))
bs := &buildStatus{
builderRev: builderRev{
name: conf.name,
rev: rev,
},
container: container,
start: time.Now(),
brev := builderRev{
name: conf.name,
rev: rev,
}
st := &buildStatus{
builderRev: brev,
container: container,
start: time.Now(),
}
log.Printf("%v now building in Docker container %v", brev, st.container)
go func() {
all, err := exec.Command("docker", "wait", container).CombinedOutput()
output := strings.TrimSpace(string(all))
@ -521,15 +644,15 @@ func startBuilding(conf buildConfig, rev string) (*buildStatus, error) {
exit, err := strconv.Atoi(output)
ok = (err == nil && exit == 0)
}
bs.setDone(ok)
st.setDone(ok)
log.Printf("docker wait %s/%s: %v, %s", container, rev, err, output)
donec <- builderRev{conf.name, rev}
exec.Command("docker", "rm", container).Run()
}()
go func() {
cmd := exec.Command("docker", "logs", "-f", container)
cmd.Stdout = bs
cmd.Stderr = bs
cmd.Stdout = st
cmd.Stderr = st
if err := cmd.Run(); err != nil {
// The docker logs subcommand always returns
// success, even if the underlying process
@ -537,7 +660,273 @@ func startBuilding(conf buildConfig, rev string) (*buildStatus, error) {
log.Printf("failed to follow docker logs of %s: %v", container, err)
}
}()
return bs, nil
return st, nil
}
var osArchRx = regexp.MustCompile(`^(\w+-\w+)`)
// startBuildingInVM starts a VM on GCE running the buildlet binary to build rev.
func startBuildingInVM(conf buildConfig, rev string) (*buildStatus, error) {
brev := builderRev{
name: conf.name,
rev: rev,
}
st := &buildStatus{
builderRev: brev,
start: time.Now(),
}
// name is the project-wide unique name of the GCE instance. It can't be longer
// than 61 bytes, so we only use the first 8 bytes of the rev.
name := "buildlet-" + conf.name + "-" + rev[:8]
// buildletURL is the URL of the buildlet binary which the VMs
// are configured to download at boot and run. This lets us
// update the buildlet more easily than rebuilding the whole
// VM image. We put this URL in a well-known GCE metadata attribute.
// The value will be of the form:
// http://storage.googleapis.com/go-builder-data/buildlet.GOOS-GOARCH
m := osArchRx.FindStringSubmatch(conf.name)
if m == nil {
return nil, fmt.Errorf("invalid builder name %q", conf.name)
}
buildletURL := "http://storage.googleapis.com/go-builder-data/buildlet." + m[1]
prefix := "https://www.googleapis.com/compute/v1/projects/" + projectID
machType := prefix + "/zones/" + projectZone + "/machineTypes/" + conf.MachineType()
instance := &compute.Instance{
Name: name,
Description: fmt.Sprintf("Go Builder building %s %s", conf.name, rev),
MachineType: machType,
Disks: []*compute.AttachedDisk{
{
AutoDelete: true,
Boot: true,
Type: "PERSISTENT",
InitializeParams: &compute.AttachedDiskInitializeParams{
DiskName: name,
SourceImage: "https://www.googleapis.com/compute/v1/projects/" + projectID + "/global/images/" + conf.vmImage,
DiskType: "https://www.googleapis.com/compute/v1/projects/" + projectID + "/zones/" + projectZone + "/diskTypes/pd-ssd",
},
},
},
Tags: &compute.Tags{
// Warning: do NOT list "http-server" or "allow-ssh" (our
// project's custom tag to allow ssh access) here; the
// buildlet provides full remote code execution.
Items: []string{},
},
Metadata: &compute.Metadata{
Items: []*compute.MetadataItems{
{
Key: "buildlet-binary-url",
Value: buildletURL,
},
// In case the VM gets away from us (generally: if the
// coordinator dies while a build is running), then we
// set this attribute of when it should be killed so
// we can kill it later when the coordinator is
// restarted. The cleanUpOldVMs goroutine loop handles
// that killing.
{
Key: "delete-at",
Value: fmt.Sprint(time.Now().Add(30 * time.Minute).Unix()),
},
},
},
NetworkInterfaces: []*compute.NetworkInterface{
&compute.NetworkInterface{
AccessConfigs: []*compute.AccessConfig{
&compute.AccessConfig{
Type: "ONE_TO_ONE_NAT",
Name: "External NAT",
},
},
Network: prefix + "/global/networks/default",
},
},
}
op, err := computeService.Instances.Insert(projectID, projectZone, instance).Do()
if err != nil {
return nil, fmt.Errorf("Failed to create instance: %v", err)
}
st.createOp = op.Name
st.instName = name
log.Printf("%v now building in VM %v", brev, st.instName)
// Start the goroutine to monitor the VM now that it's booting. This might
// take minutes for it to come up, and then even more time to do the build.
go func() {
err := watchVM(st)
deleteVM(projectZone, st.instName)
st.setDone(err == nil)
if err != nil {
fmt.Fprintf(st, "\n\nError: %v\n", err)
}
donec <- builderRev{conf.name, rev}
}()
return st, nil
}
// watchVM monitors a VM doing a build.
func watchVM(st *buildStatus) (err error) {
goodRes := func(res *http.Response, err error, what string) bool {
if err != nil {
err = fmt.Errorf("%s: %v", what, err)
return false
}
if res.StatusCode/100 != 2 {
err = fmt.Errorf("%s: %v", what, res.Status)
return false
}
return true
}
st.logEventTime("instance_create_requested")
// Wait for instance create operation to succeed.
OpLoop:
for {
time.Sleep(2 * time.Second)
op, err := computeService.ZoneOperations.Get(projectID, projectZone, st.createOp).Do()
if err != nil {
return fmt.Errorf("Failed to get op %s: %v", st.createOp, err)
}
switch op.Status {
case "PENDING", "RUNNING":
continue
case "DONE":
if op.Error != nil {
for _, operr := range op.Error.Errors {
return fmt.Errorf("Error creating instance: %+v", operr)
}
return errors.New("Failed to start.")
}
break OpLoop
default:
log.Fatalf("Unknown status %q: %+v", op.Status, op)
}
}
st.logEventTime("instance_created")
inst, err := computeService.Instances.Get(projectID, projectZone, st.instName).Do()
if err != nil {
return fmt.Errorf("Error getting instance %s details after creation: %v", st.instName, err)
}
st.logEventTime("got_instance_info")
// Find its internal IP.
var ip string
for _, iface := range inst.NetworkInterfaces {
if strings.HasPrefix(iface.NetworkIP, "10.") {
ip = iface.NetworkIP
}
}
if ip == "" {
return errors.New("didn't find its internal IP address")
}
// Wait for it to boot and its buildlet to come up on port 80.
st.logEventTime("waiting_for_buildlet")
buildletURL := "http://" + ip
const numTries = 60
var alive bool
for i := 1; i <= numTries; i++ {
res, err := http.Get(buildletURL)
if err != nil {
time.Sleep(1 * time.Second)
continue
}
res.Body.Close()
if res.StatusCode != 200 {
return fmt.Errorf("buildlet returned HTTP status code %d on try number %d", res.StatusCode, i)
}
st.logEventTime("buildlet_up")
alive = true
break
}
if !alive {
return fmt.Errorf("buildlet didn't come up in %d seconds", numTries)
}
// Write the VERSION file.
st.logEventTime("start_write_version_tar")
verReq, err := http.NewRequest("PUT", buildletURL+"/writetgz", versionTgz(st.rev))
if err != nil {
return err
}
verRes, err := http.DefaultClient.Do(verReq)
if !goodRes(verRes, err, "writing VERSION tgz") {
return
}
// Feed the buildlet a tar file for it to extract.
// TODO: cache these.
st.logEventTime("start_fetch_gerrit_tgz")
tarRes, err := http.Get("https://go.googlesource.com/go/+archive/" + st.rev + ".tar.gz")
if !goodRes(tarRes, err, "fetching tarball from Gerrit") {
return
}
st.logEventTime("start_write_tar")
putReq, err := http.NewRequest("PUT", buildletURL+"/writetgz", tarRes.Body)
if err != nil {
tarRes.Body.Close()
return err
}
putRes, err := http.DefaultClient.Do(putReq)
st.logEventTime("end_write_tar")
tarRes.Body.Close()
if !goodRes(putRes, err, "writing tarball to buildlet") {
return
}
// Run the builder
cmd := "all.bash"
if strings.HasPrefix(st.name, "windows-") {
cmd = "all.bat"
} else if strings.HasPrefix(st.name, "plan9-") {
cmd = "all.rc"
}
execStartTime := time.Now()
st.logEventTime("start_exec")
res, err := http.PostForm(buildletURL+"/exec", url.Values{"cmd": {"src/" + cmd}})
if !goodRes(res, err, "running "+cmd) {
return
}
defer res.Body.Close()
st.logEventTime("running_exec")
// Stream the output:
if _, err := io.Copy(st, res.Body); err != nil {
return fmt.Errorf("error copying response: %v", err)
}
st.logEventTime("done")
state := res.Trailer.Get("Process-State")
// Don't record to the dashboard unless we heard the trailer from
// the buildlet, otherwise it was probably some unrelated error
// (like the VM being killed, or the buildlet crashing due to
// e.g. https://golang.org/issue/9309, since we require a tip
// build of the buildlet to get Trailers support)
if state != "" {
conf := builders[st.name]
var log string
if state != "ok" {
log = st.logs()
}
if err := conf.recordResult(state == "ok", st.rev, log, time.Since(execStartTime)); err != nil {
return fmt.Errorf("Status was %q but failed to report it to the dashboard: %v", state, err)
}
}
if state != "ok" {
return fmt.Errorf("got Trailer process state %q", state)
}
return nil
}
type eventAndTime struct {
evt string
t time.Time
}
// buildStatus is the status of a build.
@ -547,10 +936,15 @@ type buildStatus struct {
start time.Time
container string // container ID for docker, else it's a VM
// Immutable, used by VM only:
createOp string // Instances.Insert operation name
instName string
mu sync.Mutex // guards following
done time.Time // finished running
succeeded bool // set when done
output bytes.Buffer // stdout and stderr
events []eventAndTime
}
func (st *buildStatus) setDone(succeeded bool) {
@ -560,6 +954,12 @@ func (st *buildStatus) setDone(succeeded bool) {
st.done = time.Now()
}
func (st *buildStatus) logEventTime(event string) {
st.mu.Lock()
defer st.mu.Unlock()
st.events = append(st.events, eventAndTime{event, time.Now()})
}
// htmlStatusLine returns the HTML to show within the <pre> block on
// the main page's list of active builds.
func (st *buildStatus) htmlStatusLine() string {
@ -583,8 +983,11 @@ func (st *buildStatus) htmlStatusLine() string {
buf.WriteString(", failed")
}
logsURL := fmt.Sprintf("/logs?name=%s&rev=%s&st=%p", st.name, st.rev, st)
if st.container != "" {
fmt.Fprintf(&buf, " in container <a href='/logs?name=%s&rev=%s'>%s</a>", st.name, st.rev, st.container)
fmt.Fprintf(&buf, " in container <a href='%s'>%s</a>", logsURL, st.container)
} else {
fmt.Fprintf(&buf, " in VM <a href='%s'>%s</a>", logsURL, st.instName)
}
t := st.done
@ -592,6 +995,17 @@ func (st *buildStatus) htmlStatusLine() string {
t = st.start
}
fmt.Fprintf(&buf, ", %v ago\n", time.Since(t))
for i, evt := range st.events {
var elapsed string
if i != 0 {
elapsed = fmt.Sprintf("+%0.1fs", evt.t.Sub(st.events[i-1].t).Seconds())
}
msg := evt.evt
if msg == "running_exec" {
msg = fmt.Sprintf("<a href='%s'>%s</a>", logsURL, msg)
}
fmt.Fprintf(&buf, " %7s %v %s\n", elapsed, evt.t.Format(time.RFC3339), msg)
}
return buf.String()
}
@ -607,10 +1021,28 @@ func (st *buildStatus) Write(p []byte) (n int, err error) {
st.mu.Lock()
defer st.mu.Unlock()
const maxBufferSize = 2 << 20 // 2MB of output is way more than we expect.
plen := len(p)
if st.output.Len()+len(p) > maxBufferSize {
p = p[:maxBufferSize-st.output.Len()]
}
return st.output.Write(p)
st.output.Write(p) // bytes.Buffer can't fail
return plen, nil
}
// Stop any previous go-commit-watcher Docker tasks, so they don't
// pile up upon restarts of the coordinator.
func stopWatchers() {
out, err := exec.Command("docker", "ps").Output()
if err != nil {
return
}
for _, line := range strings.Split(string(out), "\n") {
if !strings.Contains(line, "go-commit-watcher:") {
continue
}
f := strings.Fields(line)
exec.Command("docker", "rm", "-f", "-v", f[0]).Run()
}
}
func startWatching(conf watchConfig) (err error) {
@ -723,16 +1155,13 @@ func oldContainers() []string {
// "delete-at" metadata attribute on them when created to some time
// that's well beyond their expected lifetime.
func cleanUpOldVMs() {
if !hasComputeScope() {
log.Printf("The coordinator is not running with access to read and write Compute resources. Background VM cleaning disabled.")
if computeService == nil {
return
}
ts := google.ComputeTokenSource("default")
computeService, _ := compute.New(oauth2.NewClient(oauth2.NoContext, ts))
for {
for _, zone := range strings.Split(*cleanZones, ",") {
zone = strings.TrimSpace(zone)
if err := cleanZoneVMs(computeService, zone); err != nil {
if err := cleanZoneVMs(zone); err != nil {
log.Printf("Error cleaning VMs in zone %q: %v", zone, err)
}
}
@ -741,11 +1170,7 @@ func cleanUpOldVMs() {
}
// cleanZoneVMs is part of cleanUpOldVMs, operating on a single zone.
func cleanZoneVMs(svc *compute.Service, zone string) error {
proj, err := metadata.ProjectID()
if err != nil {
return fmt.Errorf("failed to get current GCE ProjectID: %v", err)
}
func cleanZoneVMs(zone string) error {
// Fetch the first 500 (default) running instances and clean
// thoes. We expect that we'll be running many fewer than
// that. Even if we have more, eventually the first 500 will
@ -753,7 +1178,7 @@ func cleanZoneVMs(svc *compute.Service, zone string) error {
// partially-different 500.
// TODO(bradfitz): revist this code if we ever start running
// thousands of VMs.
list, err := svc.Instances.List(proj, zone).Do()
list, err := computeService.Instances.List(projectID, zone).Do()
if err != nil {
return fmt.Errorf("listing instances: %v", err)
}
@ -770,7 +1195,7 @@ func cleanZoneVMs(svc *compute.Service, zone string) error {
}
if err == nil && time.Now().Unix() > unixDeadline {
log.Printf("Deleting expired VM %q in zone %q ...", inst.Name, zone)
deleteVM(svc, zone, inst.Name)
deleteVM(zone, inst.Name)
}
}
}
@ -778,13 +1203,8 @@ func cleanZoneVMs(svc *compute.Service, zone string) error {
return nil
}
func deleteVM(svc *compute.Service, zone, instName string) {
proj, err := metadata.ProjectID()
if err != nil {
log.Printf("failed to get project id to delete instace: %v", err)
return
}
op, err := svc.Instances.Delete(proj, zone, instName).Do()
func deleteVM(zone, instName string) {
op, err := computeService.Instances.Delete(projectID, zone, instName).Do()
if err != nil {
log.Printf("Failed to delete instance %q in zone %q: %v", instName, zone, err)
return
@ -808,3 +1228,102 @@ func hasComputeScope() bool {
}
return false
}
// dash is copied from the builder binary. It runs the given method and command on the dashboard.
//
// TODO(bradfitz,adg): unify this somewhere?
//
// If args is non-nil it is encoded as the URL query string.
// If req is non-nil it is JSON-encoded and passed as the body of the HTTP POST.
// If resp is non-nil the server's response is decoded into the value pointed
// to by resp (resp must be a pointer).
func dash(meth, cmd string, args url.Values, req, resp interface{}) error {
const builderVersion = 1 // keep in sync with dashboard/app/build/handler.go
argsCopy := url.Values{"version": {fmt.Sprint(builderVersion)}}
for k, v := range args {
if k == "version" {
panic(`dash: reserved args key: "version"`)
}
argsCopy[k] = v
}
var r *http.Response
var err error
cmd = "https://build.golang.org/" + cmd + "?" + argsCopy.Encode()
switch meth {
case "GET":
if req != nil {
log.Panicf("%s to %s with req", meth, cmd)
}
r, err = http.Get(cmd)
case "POST":
var body io.Reader
if req != nil {
b, err := json.Marshal(req)
if err != nil {
return err
}
body = bytes.NewBuffer(b)
}
r, err = http.Post(cmd, "text/json", body)
default:
log.Panicf("%s: invalid method %q", cmd, meth)
panic("invalid method: " + meth)
}
if err != nil {
return err
}
defer r.Body.Close()
if r.StatusCode != http.StatusOK {
return fmt.Errorf("bad http response: %v", r.Status)
}
body := new(bytes.Buffer)
if _, err := body.ReadFrom(r.Body); err != nil {
return err
}
// Read JSON-encoded Response into provided resp
// and return an error if present.
var result = struct {
Response interface{}
Error string
}{
// Put the provided resp in here as it can be a pointer to
// some value we should unmarshal into.
Response: resp,
}
if err = json.Unmarshal(body.Bytes(), &result); err != nil {
log.Printf("json unmarshal %#q: %s\n", body.Bytes(), err)
return err
}
if result.Error != "" {
return errors.New(result.Error)
}
return nil
}
func versionTgz(rev string) io.Reader {
var buf bytes.Buffer
zw := gzip.NewWriter(&buf)
tw := tar.NewWriter(zw)
contents := fmt.Sprintf("devel " + rev)
check(tw.WriteHeader(&tar.Header{
Name: "VERSION",
Mode: 0644,
Size: int64(len(contents)),
}))
_, err := io.WriteString(tw, contents)
check(err)
check(tw.Close())
check(zw.Close())
return bytes.NewReader(buf.Bytes())
}
// check is only for things which should be impossible (not even rare)
// to fail.
func check(err error) {
if err != nil {
panic("previously assumed to never fail: " + err.Error())
}
}