mirror of
https://github.com/golang/go
synced 2024-11-18 18:04:46 -07:00
dashboard/coordinator: clean up VMs more, fix watchVM bug, tweak plan 9 settings
Change-Id: I30609505cd3790f9e9505a4f020215de7b0ab74b Reviewed-on: https://go-review.googlesource.com/2474 Reviewed-by: Andrew Gerrand <adg@golang.org>
This commit is contained in:
parent
af7d92a90f
commit
2cd6e42cb2
@ -13,6 +13,7 @@ import (
|
||||
"compress/gzip"
|
||||
"crypto/hmac"
|
||||
"crypto/md5"
|
||||
"crypto/rand"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"flag"
|
||||
@ -63,7 +64,16 @@ var (
|
||||
statusDone []*buildStatus // finished recently, capped to maxStatusDone
|
||||
)
|
||||
|
||||
const maxStatusDone = 30
|
||||
const (
|
||||
maxStatusDone = 30
|
||||
|
||||
// vmDeleteTimeout is how long before we delete a VM.
|
||||
// In practice this need only be as long as the slowest
|
||||
// builder (plan9 currently), because on startup this program
|
||||
// already deletes all buildlets it doesn't know about
|
||||
// (i.e. ones from a previous instance of the coordinator).
|
||||
vmDeleteTimeout = 45 * time.Minute
|
||||
)
|
||||
|
||||
// Initialized by initGCE:
|
||||
var (
|
||||
@ -194,8 +204,38 @@ func main() {
|
||||
addBuilder(buildConfig{name: "linux-amd64-clang", image: "gobuilders/linux-x86-clang"})
|
||||
|
||||
// VMs:
|
||||
addBuilder(buildConfig{name: "openbsd-amd64-gce56", vmImage: "openbsd-amd64-56"})
|
||||
// addBuilder(buildConfig{name: "plan9-386-gce", vmImage: "plan9-386"})
|
||||
addBuilder(buildConfig{
|
||||
name: "openbsd-amd64-gce56",
|
||||
vmImage: "openbsd-amd64-56",
|
||||
machineType: "n1-highcpu-2",
|
||||
})
|
||||
addBuilder(buildConfig{
|
||||
name: "plan9-386-gce",
|
||||
vmImage: "plan9-386",
|
||||
// We *were* using n1-standard-1 because Plan 9 can only
|
||||
// reliably use a single CPU. Using 2 or 4 and we see
|
||||
// test failures. See:
|
||||
// https://golang.org/issue/8393
|
||||
// https://golang.org/issue/9491
|
||||
// n1-standard-1 has 3.6 GB of memory which is
|
||||
// overkill (userspace probably only sees 2GB anyway),
|
||||
// but it's the cheapest option. And plenty to keep
|
||||
// our ~250 MB of inputs+outputs in its ramfs.
|
||||
//
|
||||
// But the docs says "For the n1 series of machine
|
||||
// types, a virtual CPU is implemented as a single
|
||||
// hyperthread on a 2.6GHz Intel Sandy Bridge Xeon or
|
||||
// Intel Ivy Bridge Xeon (or newer) processor. This
|
||||
// means that the n1-standard-2 machine type will see
|
||||
// a whole physical core."
|
||||
//
|
||||
// ... so we use n1-highcpu-2 (1.80 RAM, still
|
||||
// plenty), just so we can get 1 whole core for the
|
||||
// single-core Plan 9. It will see 2 virtual cores and
|
||||
// only use 1, but we hope that 1 will be more powerful
|
||||
// and we'll stop timing out on tests.
|
||||
machineType: "n1-highcpu-2",
|
||||
})
|
||||
|
||||
addWatcher(watchConfig{repo: "https://go.googlesource.com/go", dash: "https://build.golang.org/"})
|
||||
// TODO(adg,cmang): fix gccgo watcher
|
||||
@ -318,6 +358,21 @@ func markDone(work builderRev) {
|
||||
statusDone = append(statusDone, st)
|
||||
}
|
||||
|
||||
func vmIsBuilding(instName string) bool {
|
||||
if instName == "" {
|
||||
log.Printf("bogus empty instance name passed to vmIsBuilding")
|
||||
return false
|
||||
}
|
||||
statusMu.Lock()
|
||||
defer statusMu.Unlock()
|
||||
for _, st := range status {
|
||||
if st.instName == instName {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// statusPtrStr disambiguates which status to return if there are
|
||||
// multiple in the history (e.g. recent failures where the build
|
||||
// didn't finish for reasons outside of all.bash failing)
|
||||
@ -710,6 +765,15 @@ func startBuildingInDocker(conf buildConfig, rev string) (*buildStatus, error) {
|
||||
|
||||
var osArchRx = regexp.MustCompile(`^(\w+-\w+)`)
|
||||
|
||||
func randHex(n int) string {
|
||||
buf := make([]byte, n/2)
|
||||
_, err := rand.Read(buf)
|
||||
if err != nil {
|
||||
panic("Failed to get randomness: " + err.Error())
|
||||
}
|
||||
return fmt.Sprintf("%x", buf)
|
||||
}
|
||||
|
||||
// startBuildingInVM starts a VM on GCE running the buildlet binary to build rev.
|
||||
func startBuildingInVM(conf buildConfig, rev string) (*buildStatus, error) {
|
||||
brev := builderRev{
|
||||
@ -723,7 +787,7 @@ func startBuildingInVM(conf buildConfig, rev string) (*buildStatus, error) {
|
||||
|
||||
// name is the project-wide unique name of the GCE instance. It can't be longer
|
||||
// than 61 bytes, so we only use the first 8 bytes of the rev.
|
||||
name := "buildlet-" + conf.name + "-" + rev[:8]
|
||||
name := "buildlet-" + conf.name + "-" + rev[:8] + "-rn" + randHex(6)
|
||||
|
||||
// buildletURL is the URL of the buildlet binary which the VMs
|
||||
// are configured to download at boot and run. This lets us
|
||||
@ -776,7 +840,7 @@ func startBuildingInVM(conf buildConfig, rev string) (*buildStatus, error) {
|
||||
// that killing.
|
||||
{
|
||||
Key: "delete-at",
|
||||
Value: fmt.Sprint(time.Now().Add(30 * time.Minute).Unix()),
|
||||
Value: fmt.Sprint(time.Now().Add(vmDeleteTimeout).Unix()),
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -816,14 +880,16 @@ func startBuildingInVM(conf buildConfig, rev string) (*buildStatus, error) {
|
||||
}
|
||||
|
||||
// watchVM monitors a VM doing a build.
|
||||
func watchVM(st *buildStatus) (err error) {
|
||||
func watchVM(st *buildStatus) (retErr error) {
|
||||
goodRes := func(res *http.Response, err error, what string) bool {
|
||||
if err != nil {
|
||||
err = fmt.Errorf("%s: %v", what, err)
|
||||
retErr = fmt.Errorf("%s: %v", what, err)
|
||||
return false
|
||||
}
|
||||
if res.StatusCode/100 != 2 {
|
||||
err = fmt.Errorf("%s: %v", what, res.Status)
|
||||
slurp, _ := ioutil.ReadAll(io.LimitReader(res.Body, 4<<10))
|
||||
retErr = fmt.Errorf("%s: %v; body: %s", what, res.Status, slurp)
|
||||
res.Body.Close()
|
||||
return false
|
||||
|
||||
}
|
||||
@ -1241,12 +1307,18 @@ func cleanZoneVMs(zone string) error {
|
||||
return fmt.Errorf("listing instances: %v", err)
|
||||
}
|
||||
for _, inst := range list.Items {
|
||||
if !strings.HasPrefix(inst.Name, "buildlet-") {
|
||||
// We only delete ones we created.
|
||||
continue
|
||||
}
|
||||
if inst.Metadata == nil {
|
||||
// Defensive. Not seen in practice.
|
||||
continue
|
||||
}
|
||||
sawDeleteAt := false
|
||||
for _, it := range inst.Metadata.Items {
|
||||
if it.Key == "delete-at" {
|
||||
sawDeleteAt = true
|
||||
unixDeadline, err := strconv.ParseInt(it.Value, 10, 64)
|
||||
if err != nil {
|
||||
log.Printf("invalid delete-at value %q seen; ignoring", it.Value)
|
||||
@ -1257,6 +1329,10 @@ func cleanZoneVMs(zone string) error {
|
||||
}
|
||||
}
|
||||
}
|
||||
if sawDeleteAt && !vmIsBuilding(inst.Name) {
|
||||
log.Printf("Deleting VM %q in zone %q from an earlier coordinator generation ...", inst.Name, zone)
|
||||
deleteVM(zone, inst.Name)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user