diff --git a/dashboard/builders.go b/dashboard/builders.go index edeadc2816..bfa4d25ae2 100644 --- a/dashboard/builders.go +++ b/dashboard/builders.go @@ -36,6 +36,17 @@ type BuildConfig struct { tool string // the tool this configuration is for } +func (c *BuildConfig) GOOS() string { return c.Name[:strings.Index(c.Name, "-")] } + +func (c *BuildConfig) GOARCH() string { + arch := c.Name[strings.Index(c.Name, "-")+1:] + i := strings.Index(arch, "-") + if i == -1 { + return arch + } + return arch[:i] +} + func (c *BuildConfig) UsesDocker() bool { return c.VMImage == "" } func (c *BuildConfig) UsesVM() bool { return c.VMImage != "" } diff --git a/dashboard/builders_test.go b/dashboard/builders_test.go new file mode 100644 index 0000000000..96effe5afa --- /dev/null +++ b/dashboard/builders_test.go @@ -0,0 +1,22 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package dashboard + +import ( + "strings" + "testing" +) + +func TestOSARCHAccessors(t *testing.T) { + valid := func(s string) bool { return s != "" && !strings.Contains(s, "-") } + for _, conf := range Builders { + os := conf.GOOS() + arch := conf.GOARCH() + osArch := os + "-" + arch + if !valid(os) || !valid(arch) || !(conf.Name == osArch || strings.HasPrefix(conf.Name, osArch+"-")) { + t.Errorf("OS+ARCH(%q) = %q, %q; invalid", conf.Name, os, arch) + } + } +} diff --git a/dashboard/buildlet/buildletclient.go b/dashboard/buildlet/buildletclient.go new file mode 100644 index 0000000000..ae0e87bf9b --- /dev/null +++ b/dashboard/buildlet/buildletclient.go @@ -0,0 +1,72 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build extdep + +// Package buildlet contains client tools for working with a buildlet +// server. +package buildlet // import "golang.org/x/tools/dashboard/buildlet" + +import ( + "fmt" + "io" + "io/ioutil" + "net/http" + "strings" +) + +// KeyPair is the TLS public certificate PEM file and its associated +// private key PEM file that a builder will use for its HTTPS +// server. The zero value means no HTTPs, which is used by the +// coordinator for machines running within a firewall. +type KeyPair struct { + CertPEM string + KeyPEM string +} + +// NoKeyPair is used by the coordinator to speak http directly to buildlets, +// inside their firewall, without TLS. +var NoKeyPair = KeyPair{} + +// NewClient returns a *Client that will manipulate ipPort, +// authenticated using the provided keypair. +// +// This constructor returns immediately without testing the host or auth. +func NewClient(ipPort string, tls KeyPair) *Client { + return &Client{ + ipPort: ipPort, + tls: tls, + } +} + +// A Client interacts with a single buildlet. +type Client struct { + ipPort string + tls KeyPair +} + +// URL returns the buildlet's URL prefix, without a trailing slash. +func (c *Client) URL() string { + if c.tls != NoKeyPair { + return "http://" + strings.TrimSuffix(c.ipPort, ":80") + } + return "https://" + strings.TrimSuffix(c.ipPort, ":443") +} + +func (c *Client) PutTarball(r io.Reader) error { + req, err := http.NewRequest("PUT", c.URL()+"/writetgz", r) + if err != nil { + return err + } + res, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer res.Body.Close() + if res.StatusCode/100 != 2 { + slurp, _ := ioutil.ReadAll(io.LimitReader(res.Body, 4<<10)) + return fmt.Errorf("%v; body: %s", res.Status, slurp) + } + return nil +} diff --git a/dashboard/buildlet/gce.go b/dashboard/buildlet/gce.go new file mode 100644 index 0000000000..325eb354b6 --- /dev/null +++ b/dashboard/buildlet/gce.go @@ -0,0 +1,245 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build extdep + +package buildlet + +import ( + "crypto/tls" + "errors" + "fmt" + "net/http" + "strings" + "time" + + "golang.org/x/oauth2" + "golang.org/x/tools/dashboard" + "google.golang.org/api/compute/v1" +) + +type VMOpts struct { + // Zone is the GCE zone to create the VM in. Required. + Zone string + + // ProjectID is the GCE project ID. Required. + ProjectID string + + // TLS optionally specifies the TLS keypair to use. + // If zero, http without auth is used. + TLS KeyPair + + // Optional description of the VM. + Description string + + // Optional metadata to put on the instance. + Meta map[string]string + + // DeleteIn optionally specifies a duration at which + // to delete the VM. + DeleteIn time.Duration + + // OnInstanceRequested optionally specifies a hook to run synchronously + // after the computeService.Instances.Insert call, but before + // waiting for its operation to proceed. + OnInstanceRequested func() + + // OnInstanceCreated optionally specifies a hook to run synchronously + // after the instance operation succeeds. + OnInstanceCreated func() + + // OnInstanceCreated optionally specifies a hook to run synchronously + // after the computeService.Instances.Get call. + OnGotInstanceInfo func() +} + +// StartNewVM boots a new VM on GCE and returns a buildlet client +// configured to speak to it. +func StartNewVM(ts oauth2.TokenSource, instName, builderType string, opts VMOpts) (*Client, error) { + computeService, _ := compute.New(oauth2.NewClient(oauth2.NoContext, ts)) + + conf, ok := dashboard.Builders[builderType] + if !ok { + return nil, fmt.Errorf("invalid builder type %q", builderType) + } + + zone := opts.Zone + if zone == "" { + // TODO: automatic? maybe that's not useful. + // For now just return an error. + return nil, errors.New("buildlet: missing required Zone option") + } + projectID := opts.ProjectID + if projectID == "" { + return nil, errors.New("buildlet: missing required ProjectID option") + } + + prefix := "https://www.googleapis.com/compute/v1/projects/" + projectID + machType := prefix + "/zones/" + zone + "/machineTypes/" + conf.MachineType() + + instance := &compute.Instance{ + Name: instName, + Description: opts.Description, + MachineType: machType, + Disks: []*compute.AttachedDisk{ + { + AutoDelete: true, + Boot: true, + Type: "PERSISTENT", + InitializeParams: &compute.AttachedDiskInitializeParams{ + DiskName: instName, + SourceImage: "https://www.googleapis.com/compute/v1/projects/" + projectID + "/global/images/" + conf.VMImage, + DiskType: "https://www.googleapis.com/compute/v1/projects/" + projectID + "/zones/" + zone + "/diskTypes/pd-ssd", + }, + }, + }, + Tags: &compute.Tags{ + // Warning: do NOT list "http-server" or "allow-ssh" (our + // project's custom tag to allow ssh access) here; the + // buildlet provides full remote code execution. + // The https-server is authenticated, though. + Items: []string{"https-server"}, + }, + Metadata: &compute.Metadata{ + Items: []*compute.MetadataItems{ + // The buildlet-binary-url is the URL of the buildlet binary + // which the VMs are configured to download at boot and run. + // This lets us/ update the buildlet more easily than + // rebuilding the whole VM image. + { + Key: "buildlet-binary-url", + Value: "http://storage.googleapis.com/go-builder-data/buildlet." + conf.GOOS() + "-" + conf.GOARCH(), + }, + }, + }, + NetworkInterfaces: []*compute.NetworkInterface{ + &compute.NetworkInterface{ + AccessConfigs: []*compute.AccessConfig{ + &compute.AccessConfig{ + Type: "ONE_TO_ONE_NAT", + Name: "External NAT", + }, + }, + Network: prefix + "/global/networks/default", + }, + }, + } + + if opts.DeleteIn != 0 { + // In case the VM gets away from us (generally: if the + // coordinator dies while a build is running), then we + // set this attribute of when it should be killed so + // we can kill it later when the coordinator is + // restarted. The cleanUpOldVMs goroutine loop handles + // that killing. + instance.Metadata.Items = append(instance.Metadata.Items, &compute.MetadataItems{ + Key: "delete-at", + Value: fmt.Sprint(time.Now().Add(opts.DeleteIn).Unix()), + }) + } + for k, v := range opts.Meta { + instance.Metadata.Items = append(instance.Metadata.Items, &compute.MetadataItems{ + Key: k, + Value: v, + }) + } + + op, err := computeService.Instances.Insert(projectID, zone, instance).Do() + if err != nil { + return nil, fmt.Errorf("Failed to create instance: %v", err) + } + if fn := opts.OnInstanceRequested; fn != nil { + fn() + } + createOp := op.Name + + // Wait for instance create operation to succeed. +OpLoop: + for { + time.Sleep(2 * time.Second) + op, err := computeService.ZoneOperations.Get(projectID, zone, createOp).Do() + if err != nil { + return nil, fmt.Errorf("Failed to get op %s: %v", createOp, err) + } + switch op.Status { + case "PENDING", "RUNNING": + continue + case "DONE": + if op.Error != nil { + for _, operr := range op.Error.Errors { + return nil, fmt.Errorf("Error creating instance: %+v", operr) + } + return nil, errors.New("Failed to start.") + } + break OpLoop + default: + return nil, fmt.Errorf("Unknown create status %q: %+v", op.Status, op) + } + } + if fn := opts.OnInstanceCreated; fn != nil { + fn() + } + + inst, err := computeService.Instances.Get(projectID, zone, instName).Do() + if err != nil { + return nil, fmt.Errorf("Error getting instance %s details after creation: %v", instName, err) + } + + // Find its internal IP. + var ip string + for _, iface := range inst.NetworkInterfaces { + if strings.HasPrefix(iface.NetworkIP, "10.") { + ip = iface.NetworkIP + } + } + if ip == "" { + return nil, errors.New("didn't find its internal IP address") + } + + // Wait for it to boot and its buildlet to come up. + var buildletURL string + var ipPort string + if opts.TLS != NoKeyPair { + buildletURL = "https://" + ip + ipPort = ip + ":443" + } else { + buildletURL = "http://" + ip + ipPort = ip + ":80" + } + if fn := opts.OnGotInstanceInfo; fn != nil { + fn() + } + + const timeout = 90 * time.Second + var alive bool + impatientClient := &http.Client{ + Timeout: 5 * time.Second, + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: true, + }, + }, + } + deadline := time.Now().Add(timeout) + try := 0 + for time.Now().Before(deadline) { + try++ + res, err := impatientClient.Get(buildletURL) + if err != nil { + time.Sleep(1 * time.Second) + continue + } + res.Body.Close() + if res.StatusCode != 200 { + return nil, fmt.Errorf("buildlet returned HTTP status code %d on try number %d", res.StatusCode, try) + } + alive = true + break + } + if !alive { + return nil, fmt.Errorf("buildlet didn't come up in %v", timeout) + } + + return NewClient(ipPort, opts.TLS), nil +} diff --git a/dashboard/cmd/coordinator/coordinator.go b/dashboard/cmd/coordinator/coordinator.go index 2a8ad0821f..40d58fe760 100644 --- a/dashboard/cmd/coordinator/coordinator.go +++ b/dashboard/cmd/coordinator/coordinator.go @@ -27,7 +27,6 @@ import ( "os" "os/exec" "path" - "regexp" "sort" "strconv" "strings" @@ -37,6 +36,7 @@ import ( "golang.org/x/oauth2" "golang.org/x/oauth2/google" "golang.org/x/tools/dashboard" + "golang.org/x/tools/dashboard/buildlet" "golang.org/x/tools/dashboard/types" "google.golang.org/api/compute/v1" "google.golang.org/cloud/compute/metadata" @@ -80,6 +80,7 @@ var ( projectZone string computeService *compute.Service externalIP string + tokenSource oauth2.TokenSource ) func initGCE() error { @@ -105,8 +106,8 @@ func initGCE() error { if err != nil { return fmt.Errorf("ExternalIP: %v", err) } - ts := google.ComputeTokenSource("default") - computeService, _ = compute.New(oauth2.NewClient(oauth2.NoContext, ts)) + tokenSource = google.ComputeTokenSource("default") + computeService, _ = compute.New(oauth2.NewClient(oauth2.NoContext, tokenSource)) return nil } @@ -669,8 +670,6 @@ func startBuildingInDocker(conf dashboard.BuildConfig, rev string) (*buildStatus return st, nil } -var osArchRx = regexp.MustCompile(`^(\w+-\w+)`) - func randHex(n int) string { buf := make([]byte, n/2) _, err := rand.Read(buf) @@ -687,95 +686,22 @@ func startBuildingInVM(conf dashboard.BuildConfig, rev string) (*buildStatus, er name: conf.Name, rev: rev, } - st := &buildStatus{ - builderRev: brev, - start: time.Now(), - } - // name is the project-wide unique name of the GCE instance. It can't be longer // than 61 bytes, so we only use the first 8 bytes of the rev. name := "buildlet-" + conf.Name + "-" + rev[:8] + "-rn" + randHex(6) - // buildletURL is the URL of the buildlet binary which the VMs - // are configured to download at boot and run. This lets us - // update the buildlet more easily than rebuilding the whole - // VM image. We put this URL in a well-known GCE metadata attribute. - // The value will be of the form: - // http://storage.googleapis.com/go-builder-data/buildlet.GOOS-GOARCH - m := osArchRx.FindStringSubmatch(conf.Name) - if m == nil { - return nil, fmt.Errorf("invalid builder name %q", conf.Name) + st := &buildStatus{ + builderRev: brev, + start: time.Now(), + instName: name, } - buildletURL := "http://storage.googleapis.com/go-builder-data/buildlet." + m[1] - prefix := "https://www.googleapis.com/compute/v1/projects/" + projectID - machType := prefix + "/zones/" + projectZone + "/machineTypes/" + conf.MachineType() - - instance := &compute.Instance{ - Name: name, - Description: fmt.Sprintf("Go Builder building %s %s", conf.Name, rev), - MachineType: machType, - Disks: []*compute.AttachedDisk{ - { - AutoDelete: true, - Boot: true, - Type: "PERSISTENT", - InitializeParams: &compute.AttachedDiskInitializeParams{ - DiskName: name, - SourceImage: "https://www.googleapis.com/compute/v1/projects/" + projectID + "/global/images/" + conf.VMImage, - DiskType: "https://www.googleapis.com/compute/v1/projects/" + projectID + "/zones/" + projectZone + "/diskTypes/pd-ssd", - }, - }, - }, - Tags: &compute.Tags{ - // Warning: do NOT list "http-server" or "allow-ssh" (our - // project's custom tag to allow ssh access) here; the - // buildlet provides full remote code execution. - Items: []string{}, - }, - Metadata: &compute.Metadata{ - Items: []*compute.MetadataItems{ - { - Key: "buildlet-binary-url", - Value: buildletURL, - }, - // In case the VM gets away from us (generally: if the - // coordinator dies while a build is running), then we - // set this attribute of when it should be killed so - // we can kill it later when the coordinator is - // restarted. The cleanUpOldVMs goroutine loop handles - // that killing. - { - Key: "delete-at", - Value: fmt.Sprint(time.Now().Add(vmDeleteTimeout).Unix()), - }, - }, - }, - NetworkInterfaces: []*compute.NetworkInterface{ - &compute.NetworkInterface{ - AccessConfigs: []*compute.AccessConfig{ - &compute.AccessConfig{ - Type: "ONE_TO_ONE_NAT", - Name: "External NAT", - }, - }, - Network: prefix + "/global/networks/default", - }, - }, - } - op, err := computeService.Instances.Insert(projectID, projectZone, instance).Do() - if err != nil { - return nil, fmt.Errorf("Failed to create instance: %v", err) - } - st.createOp = op.Name - st.instName = name - log.Printf("%v now building in VM %v", brev, st.instName) - // Start the goroutine to monitor the VM now that it's booting. This might - // take minutes for it to come up, and then even more time to do the build. go func() { - err := watchVM(st) - if st.hasEvent("instance_created") { - deleteVM(projectZone, st.instName) + err := buildInVM(conf, st) + if err != nil { + if st.hasEvent("instance_created") { + go deleteVM(projectZone, st.instName) + } } st.setDone(err == nil) if err != nil { @@ -786,8 +712,27 @@ func startBuildingInVM(conf dashboard.BuildConfig, rev string) (*buildStatus, er return st, nil } -// watchVM monitors a VM doing a build. -func watchVM(st *buildStatus) (retErr error) { +func buildInVM(conf dashboard.BuildConfig, st *buildStatus) (retErr error) { + bc, err := buildlet.StartNewVM(tokenSource, st.instName, conf.Name, buildlet.VMOpts{ + ProjectID: projectID, + Zone: projectZone, + Description: fmt.Sprintf("Go Builder building %s %s", conf.Name, st.rev), + DeleteIn: vmDeleteTimeout, + OnInstanceRequested: func() { + st.logEventTime("instance_create_requested") + log.Printf("%v now booting VM %v for build", st.builderRev, st.instName) + }, + OnInstanceCreated: func() { + st.logEventTime("instance_created") + }, + OnGotInstanceInfo: func() { + st.logEventTime("waiting_for_buildlet") + }, + }) + if err != nil { + return err + } + st.logEventTime("buildlet_up") goodRes := func(res *http.Response, err error, what string) bool { if err != nil { retErr = fmt.Errorf("%s: %v", what, err) @@ -802,82 +747,11 @@ func watchVM(st *buildStatus) (retErr error) { } return true } - st.logEventTime("instance_create_requested") - // Wait for instance create operation to succeed. -OpLoop: - for { - time.Sleep(2 * time.Second) - op, err := computeService.ZoneOperations.Get(projectID, projectZone, st.createOp).Do() - if err != nil { - return fmt.Errorf("Failed to get op %s: %v", st.createOp, err) - } - switch op.Status { - case "PENDING", "RUNNING": - continue - case "DONE": - if op.Error != nil { - for _, operr := range op.Error.Errors { - return fmt.Errorf("Error creating instance: %+v", operr) - } - return errors.New("Failed to start.") - } - break OpLoop - default: - log.Fatalf("Unknown status %q: %+v", op.Status, op) - } - } - st.logEventTime("instance_created") - - inst, err := computeService.Instances.Get(projectID, projectZone, st.instName).Do() - if err != nil { - return fmt.Errorf("Error getting instance %s details after creation: %v", st.instName, err) - } - st.logEventTime("got_instance_info") - - // Find its internal IP. - var ip string - for _, iface := range inst.NetworkInterfaces { - if strings.HasPrefix(iface.NetworkIP, "10.") { - ip = iface.NetworkIP - } - } - if ip == "" { - return errors.New("didn't find its internal IP address") - } - - // Wait for it to boot and its buildlet to come up on port 80. - st.logEventTime("waiting_for_buildlet") - buildletURL := "http://" + ip - const numTries = 60 - var alive bool - impatientClient := &http.Client{Timeout: 2 * time.Second} - for i := 1; i <= numTries; i++ { - res, err := impatientClient.Get(buildletURL) - if err != nil { - time.Sleep(1 * time.Second) - continue - } - res.Body.Close() - if res.StatusCode != 200 { - return fmt.Errorf("buildlet returned HTTP status code %d on try number %d", res.StatusCode, i) - } - st.logEventTime("buildlet_up") - alive = true - break - } - if !alive { - return fmt.Errorf("buildlet didn't come up in %d seconds", numTries) - } // Write the VERSION file. st.logEventTime("start_write_version_tar") - verReq, err := http.NewRequest("PUT", buildletURL+"/writetgz", versionTgz(st.rev)) - if err != nil { - return err - } - verRes, err := http.DefaultClient.Do(verReq) - if !goodRes(verRes, err, "writing VERSION tgz") { - return + if err := bc.PutTarball(versionTgz(st.rev)); err != nil { + return fmt.Errorf("writing VERSION tgz: %v", err) } // Feed the buildlet a tar file for it to extract. @@ -889,18 +763,13 @@ OpLoop: } st.logEventTime("start_write_tar") - putReq, err := http.NewRequest("PUT", buildletURL+"/writetgz", tarRes.Body) - if err != nil { + if err := bc.PutTarball(tarRes.Body); err != nil { tarRes.Body.Close() - return err + return fmt.Errorf("writing tarball from Gerrit: %v", err) } - putRes, err := http.DefaultClient.Do(putReq) st.logEventTime("end_write_tar") - tarRes.Body.Close() - if !goodRes(putRes, err, "writing tarball to buildlet") { - return - } + // TODO(bradfitz): add an Exec method to buildlet.Client and update this code. // Run the builder cmd := "all.bash" if strings.HasPrefix(st.name, "windows-") { @@ -910,7 +779,7 @@ OpLoop: } execStartTime := time.Now() st.logEventTime("start_exec") - res, err := http.PostForm(buildletURL+"/exec", url.Values{"cmd": {"src/" + cmd}}) + res, err := http.PostForm(bc.URL()+"/exec", url.Values{"cmd": {"src/" + cmd}}) if !goodRes(res, err, "running "+cmd) { return } @@ -958,7 +827,6 @@ type buildStatus struct { container string // container ID for docker, else it's a VM // Immutable, used by VM only: - createOp string // Instances.Insert operation name instName string mu sync.Mutex // guards following