diff --git a/.hgignore b/.hgignore index c27ee1ef45..47c82babf0 100644 --- a/.hgignore +++ b/.hgignore @@ -1,3 +1,7 @@ # Add no patterns to .hgignore except for files generated by the build. syntax:glob last-change +dashboard/coordinator/buildongce/client-*.dat +dashboard/coordinator/buildongce/token.dat +dashboard/coordinator/coordinator + diff --git a/dashboard/README b/dashboard/README index d599f3d063..2d79ab7654 100644 --- a/dashboard/README +++ b/dashboard/README @@ -4,8 +4,12 @@ The files in this directory constitute the continuous builder: -app/: an AppEngine server +app/: an AppEngine server. The code that runs http://build.golang.org/ builder/: gobuilder, a Go continuous build client +coordinator/: daemon that runs on CoreOS on Google Compute Engine and manages + builds (using the builder in single-shot mode) in Docker containers. +env/: configuration files describing the environment of builders. + Many builders are still configured ad-hoc. If you wish to run a Go builder, please email golang-dev@googlegroups.com diff --git a/dashboard/coordinator/buildongce/create.go b/dashboard/coordinator/buildongce/create.go new file mode 100644 index 0000000000..005e292821 --- /dev/null +++ b/dashboard/coordinator/buildongce/create.go @@ -0,0 +1,213 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "bufio" + "encoding/json" + "flag" + "fmt" + "io/ioutil" + "log" + "net/http" + "os" + "strings" + "time" + + "code.google.com/p/goauth2/oauth" + compute "code.google.com/p/google-api-go-client/compute/v1" + storage "code.google.com/p/google-api-go-client/storage/v1" +) + +var ( + proj = flag.String("project", "symbolic-datum-552", "name of Project") + zone = flag.String("zone", "us-central1-a", "GCE zone") + mach = flag.String("machinetype", "n1-standard-16", "Machine type") + instName = flag.String("instance_name", "go-builder", "Name of VM instance.") + sshPub = flag.String("ssh_public_key", "", "ssh public key file to authorize. Can modify later in Google's web UI anyway.") +) + +func readFile(v string) string { + slurp, err := ioutil.ReadFile(v) + if err != nil { + log.Fatalf("Error reading %s: %v", v, err) + } + return strings.TrimSpace(string(slurp)) +} + +var config = &oauth.Config{ + // The client-id and secret should be for an "Installed Application" when using + // the CLI. Later we'll use a web application with a callback. + ClientId: readFile("client-id.dat"), + ClientSecret: readFile("client-secret.dat"), + Scope: strings.Join([]string{ + compute.DevstorageFull_controlScope, + compute.ComputeScope, + "https://www.googleapis.com/auth/sqlservice", + "https://www.googleapis.com/auth/sqlservice.admin", + }, " "), + AuthURL: "https://accounts.google.com/o/oauth2/auth", + TokenURL: "https://accounts.google.com/o/oauth2/token", + RedirectURL: "urn:ietf:wg:oauth:2.0:oob", +} + +const baseConfig = `#cloud-config +coreos: + units: + - name: gobuild.service + command: start + content: | + [Unit] + Description=Go Builders + After=docker.service + Requires=docker.service + + [Service] + ExecStartPre=/bin/bash -c 'mkdir -p /opt/bin && curl -s -o /opt/bin/coordinator http://storage.googleapis.com/go-builder-data/coordinator && chmod +x /opt/bin/coordinator' + ExecStart=/opt/bin/coordinator + RestartSec=10s + Restart=always + Type=simple + + [Install] + WantedBy=multi-user.target +` + +func main() { + flag.Parse() + if *proj == "" { + log.Fatalf("Missing --project flag") + } + prefix := "https://www.googleapis.com/compute/v1/projects/" + *proj + imageURL := "https://www.googleapis.com/compute/v1/projects/coreos-cloud/global/images/coreos-alpha-402-2-0-v20140807" + machType := prefix + "/zones/" + *zone + "/machineTypes/" + *mach + + tr := &oauth.Transport{ + Config: config, + } + + tokenCache := oauth.CacheFile("token.dat") + token, err := tokenCache.Token() + if err != nil { + log.Printf("Error getting token from %s: %v", string(tokenCache), err) + log.Printf("Get auth code from %v", config.AuthCodeURL("my-state")) + fmt.Print("\nEnter auth code: ") + sc := bufio.NewScanner(os.Stdin) + sc.Scan() + authCode := strings.TrimSpace(sc.Text()) + token, err = tr.Exchange(authCode) + if err != nil { + log.Fatalf("Error exchanging auth code for a token: %v", err) + } + tokenCache.PutToken(token) + } + + tr.Token = token + oauthClient := &http.Client{Transport: tr} + computeService, _ := compute.New(oauthClient) + storageService, _ := storage.New(oauthClient) + _ = storageService // TODO? + + cloudConfig := baseConfig + if *sshPub != "" { + key := strings.TrimSpace(readFile(*sshPub)) + cloudConfig += fmt.Sprintf("\nssh_authorized_keys:\n - %s\n", key) + } + if os.Getenv("USER") == "bradfitz" { + cloudConfig += fmt.Sprintf("\nssh_authorized_keys:\n - %s\n", "ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAIEAwks9dwWKlRC+73gRbvYtVg0vdCwDSuIlyt4z6xa/YU/jTDynM4R4W10hm2tPjy8iR1k8XhDv4/qdxe6m07NjG/By1tkmGpm1mGwho4Pr5kbAAy/Qg+NLCSdAYnnE00FQEcFOC15GFVMOW2AzDGKisReohwH9eIzHPzdYQNPRWXE= bradfitz@papag.bradfitz.com") + } + const maxCloudConfig = 32 << 10 // per compute API docs + if len(cloudConfig) > maxCloudConfig { + log.Fatalf("cloud config length of %d bytes is over %d byte limit", len(cloudConfig), maxCloudConfig) + } + + instance := &compute.Instance{ + Name: *instName, + Description: "Go Builder", + MachineType: machType, + Disks: []*compute.AttachedDisk{ + { + AutoDelete: true, + Boot: true, + Type: "PERSISTENT", + InitializeParams: &compute.AttachedDiskInitializeParams{ + DiskName: *instName + "-coreos-stateless-pd", + SourceImage: imageURL, + DiskSizeGb: 100, + }, + }, + }, + Tags: &compute.Tags{ + Items: []string{"http-server", "https-server"}, + }, + Metadata: &compute.Metadata{ + Items: []*compute.MetadataItems{ + { + Key: "user-data", + Value: cloudConfig, + }, + }, + }, + NetworkInterfaces: []*compute.NetworkInterface{ + &compute.NetworkInterface{ + AccessConfigs: []*compute.AccessConfig{ + &compute.AccessConfig{ + Type: "ONE_TO_ONE_NAT", + Name: "External NAT", + }, + }, + Network: prefix + "/global/networks/default", + }, + }, + ServiceAccounts: []*compute.ServiceAccount{ + { + Email: "default", + Scopes: []string{ + compute.DevstorageFull_controlScope, + compute.ComputeScope, + }, + }, + }, + } + + log.Printf("Creating instance...") + op, err := computeService.Instances.Insert(*proj, *zone, instance).Do() + if err != nil { + log.Fatalf("Failed to create instance: %v", err) + } + opName := op.Name + log.Printf("Created. Waiting on operation %v", opName) +OpLoop: + for { + time.Sleep(2 * time.Second) + op, err := computeService.ZoneOperations.Get(*proj, *zone, opName).Do() + if err != nil { + log.Fatalf("Failed to get op %s: %v", opName, err) + } + switch op.Status { + case "PENDING", "RUNNING": + log.Printf("Waiting on operation %v", opName) + continue + case "DONE": + if op.Error != nil { + for _, operr := range op.Error.Errors { + log.Printf("Error: %+v", operr) + } + log.Fatalf("Failed to start.") + } + log.Printf("Success. %+v", op) + break OpLoop + default: + log.Fatalf("Unknown status %q: %+v", op.Status, op) + } + } + + inst, err := computeService.Instances.Get(*proj, *zone, *instName).Do() + if err != nil { + log.Fatalf("Error getting instance after creation: %v", err) + } + ij, _ := json.MarshalIndent(inst, "", " ") + log.Printf("Instance: %s", ij) +} diff --git a/dashboard/coordinator/main.go b/dashboard/coordinator/main.go new file mode 100644 index 0000000000..7fb004d592 --- /dev/null +++ b/dashboard/coordinator/main.go @@ -0,0 +1,421 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// The coordinator runs on GCE and coordinates builds in Docker containers. +package main + +import ( + "bytes" + "crypto/hmac" + "crypto/md5" + "encoding/json" + "flag" + "fmt" + "io" + "io/ioutil" + "log" + "net/http" + "os" + "os/exec" + "sort" + "strings" + "sync" + "time" +) + +var ( + masterKeyFile = flag.String("masterkey", "", "Path to builder master key. Else fetched using GCE project attribute 'builder-master-key'.") + maxBuilds = flag.Int("maxbuilds", 6, "Max concurrent builds") + + // Debug flags: + addTemp = flag.Bool("temp", false, "Append -temp to all builders.") + just = flag.String("just", "", "If non-empty, run single build in the foreground. Requires rev.") + rev = flag.String("rev", "", "Revision to build.") +) + +var ( + startTime = time.Now() + builders = map[string]buildConfig{} // populated once at startup + donec = make(chan builderRev) // reports of finished builders + + statusMu sync.Mutex + status = map[builderRev]*buildStatus{} +) + +type imageInfo struct { + url string // of tar file + + mu sync.Mutex + lastMod string +} + +var images = map[string]*imageInfo{ + "gobuilders/linux-x86-base": {url: "https://storage.googleapis.com/go-builder-data/docker-linux.base.tar.gz"}, + "gobuilders/linux-x86-nacl": {url: "https://storage.googleapis.com/go-builder-data/docker-linux.nacl.tar.gz"}, +} + +type buildConfig struct { + name string // "linux-amd64-race" + image string // Docker image to use to build + cmd string // optional -cmd flag (relative to go/src/) + env []string // extra environment ("key=value") pairs +} + +func main() { + flag.Parse() + addBuilder(buildConfig{name: "linux-386"}) + addBuilder(buildConfig{name: "linux-386-387", env: []string{"GO386=387"}}) + addBuilder(buildConfig{name: "linux-amd64"}) + addBuilder(buildConfig{name: "linux-amd64-race"}) + addBuilder(buildConfig{name: "nacl-386"}) + addBuilder(buildConfig{name: "nacl-amd64p32"}) + + if (*just != "") != (*rev != "") { + log.Fatalf("--just and --rev must be used together") + } + if *just != "" { + conf, ok := builders[*just] + if !ok { + log.Fatalf("unknown builder %q", *just) + } + cmd := exec.Command("docker", append([]string{"run"}, conf.dockerRunArgs(*rev)...)...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + log.Fatalf("Build failed: %v", err) + } + return + } + + http.HandleFunc("/", handleStatus) + http.HandleFunc("/logs", handleLogs) + go http.ListenAndServe(":80", nil) + + workc := make(chan builderRev) + for name := range builders { + go findWorkLoop(name, workc) + } + + ticker := time.NewTicker(1 * time.Minute) + for { + select { + case work := <-workc: + log.Printf("workc received %+v; len(status) = %v, maxBuilds = %v; cur = %p", work, len(status), *maxBuilds, status[work]) + mayBuild := mayBuildRev(work) + if mayBuild { + out, _ := exec.Command("docker", "ps").Output() + numBuilds := bytes.Count(out, []byte("\n")) - 1 + log.Printf("num current docker builds: %d", numBuilds) + if numBuilds > *maxBuilds { + mayBuild = false + } + } + if mayBuild { + if st, err := startBuilding(builders[work.name], work.rev); err == nil { + setStatus(work, st) + log.Printf("%v now building in %v", work, st.container) + } else { + log.Printf("Error starting to build %v: %v", work, err) + } + } + case done := <-donec: + log.Printf("%v done", done) + setStatus(done, nil) + case <-ticker.C: + if numCurrentBuilds() == 0 && time.Now().After(startTime.Add(10*time.Minute)) { + // TODO: halt the whole machine to kill the VM or something + } + } + } +} + +func numCurrentBuilds() int { + statusMu.Lock() + defer statusMu.Unlock() + return len(status) +} + +func mayBuildRev(work builderRev) bool { + statusMu.Lock() + defer statusMu.Unlock() + return len(status) < *maxBuilds && status[work] == nil +} + +func setStatus(work builderRev, st *buildStatus) { + statusMu.Lock() + defer statusMu.Unlock() + if st == nil { + delete(status, work) + } else { + status[work] = st + } +} + +func getStatus(work builderRev) *buildStatus { + statusMu.Lock() + defer statusMu.Unlock() + return status[work] +} + +type byAge []*buildStatus + +func (s byAge) Len() int { return len(s) } +func (s byAge) Less(i, j int) bool { return s[i].start.Before(s[j].start) } +func (s byAge) Swap(i, j int) { s[i], s[j] = s[j], s[i] } + +func handleStatus(w http.ResponseWriter, r *http.Request) { + var active []*buildStatus + statusMu.Lock() + for _, st := range status { + active = append(active, st) + } + statusMu.Unlock() + + fmt.Fprintf(w, "

Go build coordinator

%d of max %d builds running:

", len(status), *maxBuilds)
+	sort.Sort(byAge(active))
+	for _, st := range active {
+		fmt.Fprintf(w, "%-22s hg %s in container %s, %v ago\n", st.name, st.rev, st.name, st.rev,
+			st.container, time.Now().Sub(st.start))
+	}
+	fmt.Fprintf(w, "
") +} + +func handleLogs(w http.ResponseWriter, r *http.Request) { + st := getStatus(builderRev{r.FormValue("name"), r.FormValue("rev")}) + if st == nil { + fmt.Fprintf(w, "

not building

") + return + } + out, err := exec.Command("docker", "logs", st.container).CombinedOutput() + if err != nil { + log.Print(err) + http.Error(w, "Error fetching logs. Already finished?", 500) + return + } + key := builderKey(st.name) + logs := strings.Replace(string(out), key, "BUILDERKEY", -1) + w.Header().Set("Content-Type", "text/plain; charset=utf-8") + io.WriteString(w, logs) +} + +func findWorkLoop(builderName string, work chan<- builderRev) { + // TODO: make this better + for { + rev, err := findWork(builderName) + if err != nil { + log.Printf("Finding work for %s: %v", builderName, err) + } else if rev != "" { + work <- builderRev{builderName, rev} + } + time.Sleep(60 * time.Second) + } +} + +func findWork(builderName string) (rev string, err error) { + var jres struct { + Response struct { + Kind string + Data struct { + Hash string + PerfResults []string + } + } + } + res, err := http.Get("https://build.golang.org/todo?builder=" + builderName + "&kind=build-go-commit") + if err != nil { + return + } + defer res.Body.Close() + if res.StatusCode != 200 { + return "", fmt.Errorf("unexpected http status %d", res.StatusCode) + } + err = json.NewDecoder(res.Body).Decode(&jres) + if jres.Response.Kind == "build-go-commit" { + rev = jres.Response.Data.Hash + } + return rev, err +} + +type builderRev struct { + name, rev string +} + +// returns the part after "docker run" +func (conf buildConfig) dockerRunArgs(rev string) (args []string) { + if key := builderKey(conf.name); key != "" { + tmpKey := "/tmp/" + conf.name + ".buildkey" + if _, err := os.Stat(tmpKey); err != nil { + if err := ioutil.WriteFile(tmpKey, []byte(key), 0600); err != nil { + log.Fatal(err) + } + } + args = append(args, "-v", tmpKey+":/.gobuildkey") + } + for _, pair := range conf.env { + args = append(args, "-e", pair) + } + args = append(args, + conf.image, + "/usr/local/bin/builder", + "-rev="+rev, + "-buildroot=/", + "-v", + ) + if conf.cmd != "" { + args = append(args, "-cmd", conf.cmd) + } + args = append(args, conf.name) + return +} + +func addBuilder(c buildConfig) { + if c.name == "" { + panic("empty name") + } + if *addTemp { + c.name += "-temp" + } + if _, dup := builders[c.name]; dup { + panic("dup name") + } + if strings.HasPrefix(c.name, "nacl-") { + if c.image == "" { + c.image = "gobuilders/linux-x86-nacl" + } + if c.cmd == "" { + c.cmd = "/usr/local/bin/build-command.pl" + } + } + if strings.HasPrefix(c.name, "linux-") && c.image == "" { + c.image = "gobuilders/linux-x86-base" + } + if c.image == "" { + panic("empty image") + } + builders[c.name] = c +} + +func condUpdateImage(img string) error { + ii := images[img] + if ii == nil { + log.Fatalf("Image %q not described.", img) + } + ii.mu.Lock() + defer ii.mu.Unlock() + res, err := http.Head(ii.url) + if err != nil { + return fmt.Errorf("Error checking %s: %v", ii.url, err) + } + if res.StatusCode != 200 { + return fmt.Errorf("Error checking %s: %v", ii.url, res.Status) + } + if res.Header.Get("Last-Modified") == ii.lastMod { + return nil + } + + res, err = http.Get(ii.url) + if err != nil || res.StatusCode != 200 { + return fmt.Errorf("Get after Head failed for %s: %v, %v", ii.url, err, res) + } + defer res.Body.Close() + + log.Printf("Running: docker load of %s\n", ii.url) + cmd := exec.Command("docker", "load") + cmd.Stdin = res.Body + + var out bytes.Buffer + cmd.Stdout = &out + cmd.Stderr = &out + + if cmd.Run(); err != nil { + log.Printf("Failed to pull latest %s from %s and pipe into docker load: %v, %s", img, ii.url, err, out.Bytes()) + return err + } + ii.lastMod = res.Header.Get("Last-Modified") + return nil +} + +func startBuilding(conf buildConfig, rev string) (*buildStatus, error) { + if err := condUpdateImage(conf.image); err != nil { + log.Printf("Failed to setup container for %v %v: %v", conf.name, rev, err) + return nil, err + } + + cmd := exec.Command("docker", append([]string{"run", "-d"}, conf.dockerRunArgs(rev)...)...) + all, err := cmd.CombinedOutput() + log.Printf("Docker run for %v %v = err:%v, output:%s", conf.name, rev, err, all) + if err != nil { + return nil, err + } + container := strings.TrimSpace(string(all)) + go func() { + all, err := exec.Command("docker", "wait", container).CombinedOutput() + log.Printf("docker wait %s: %v, %s", container, err, strings.TrimSpace(string(all))) + donec <- builderRev{conf.name, rev} + exec.Command("docker", "rm", container).Run() + }() + return &buildStatus{ + builderRev: builderRev{ + name: conf.name, + rev: rev, + }, + container: container, + start: time.Now(), + }, nil +} + +type buildStatus struct { + builderRev + container string + start time.Time + + mu sync.Mutex + // ... +} + +func builderKey(builder string) string { + master := masterKey() + if len(master) == 0 { + return "" + } + h := hmac.New(md5.New, master) + io.WriteString(h, builder) + return fmt.Sprintf("%x", h.Sum(nil)) +} + +func masterKey() []byte { + keyOnce.Do(loadKey) + return masterKeyCache +} + +var ( + keyOnce sync.Once + masterKeyCache []byte +) + +func loadKey() { + if *masterKeyFile != "" { + b, err := ioutil.ReadFile(*masterKeyFile) + if err != nil { + log.Fatal(err) + } + masterKeyCache = bytes.TrimSpace(b) + return + } + req, _ := http.NewRequest("GET", "http://metadata.google.internal/computeMetadata/v1/project/attributes/builder-master-key", nil) + req.Header.Set("Metadata-Flavor", "Google") + res, err := http.DefaultClient.Do(req) + if err != nil { + log.Fatal("No builder master key available") + } + defer res.Body.Close() + if res.StatusCode != 200 { + log.Fatalf("No builder-master-key project attribute available.") + } + slurp, err := ioutil.ReadAll(res.Body) + if err != nil { + log.Fatal(err) + } + masterKeyCache = bytes.TrimSpace(slurp) +} diff --git a/dashboard/env/linux-x86-base/Dockerfile b/dashboard/env/linux-x86-base/Dockerfile index 711eed50db..6abf6eba34 100644 --- a/dashboard/env/linux-x86-base/Dockerfile +++ b/dashboard/env/linux-x86-base/Dockerfile @@ -40,6 +40,6 @@ RUN cd $GOROOT && hg update -C b8ff0ec2a724 RUN cd $GOROOT/src && ./make.bash RUN mkdir -p /usr/local/bin -RUN cd $GO_TOOLS && hg update -C 881a2f3130de +RUN cd $GO_TOOLS && hg update -C 918b8a7e7b1e ENV GOBIN /usr/local/bin RUN /goroot/bin/go install code.google.com/p/go.tools/dashboard/builder diff --git a/dashboard/env/linux-x86-nacl/Dockerfile b/dashboard/env/linux-x86-nacl/Dockerfile index 0aea4a00c6..f7295ccb66 100644 --- a/dashboard/env/linux-x86-nacl/Dockerfile +++ b/dashboard/env/linux-x86-nacl/Dockerfile @@ -43,7 +43,7 @@ RUN cd $GOROOT && hg update -C b8ff0ec2a724 RUN cd $GOROOT/src && ./make.bash RUN mkdir -p /usr/local/bin -RUN cd $GO_TOOLS && hg update -C 881a2f3130de +RUN cd $GO_TOOLS && hg update -C 918b8a7e7b1e ENV GOBIN /usr/local/bin RUN /goroot/bin/go install code.google.com/p/go.tools/dashboard/builder