1
0
mirror of https://github.com/golang/go synced 2024-11-14 06:00:22 -07:00

misc/linkcheck: better redirect handling, use meaningful exit code

Prevent linkcheck from following redirects that lead beyond the outside
the root URL.

Return a non-zero exit code when there are problems.

Some minor refactoring for clarity.

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/14425049
This commit is contained in:
Andrew Gerrand 2013-10-25 17:31:02 +03:00
parent 2d6a13997a
commit e7426010c5

View File

@ -8,11 +8,13 @@
package main package main
import ( import (
"errors"
"flag" "flag"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"log" "log"
"net/http" "net/http"
"os"
"regexp" "regexp"
"strings" "strings"
"sync" "sync"
@ -101,22 +103,46 @@ func crawl(url string, sourceURL string) {
func addProblem(url, errmsg string) { func addProblem(url, errmsg string) {
msg := fmt.Sprintf("Error on %s: %s (from %s)", url, errmsg, linkSources[url]) msg := fmt.Sprintf("Error on %s: %s (from %s)", url, errmsg, linkSources[url])
if *verbose {
log.Print(msg) log.Print(msg)
}
problems = append(problems, msg) problems = append(problems, msg)
} }
func crawlLoop() { func crawlLoop() {
for url := range urlq { for url := range urlq {
res, err := http.Get(url) if err := doCrawl(url); err != nil {
addProblem(url, err.Error())
}
}
}
func doCrawl(url string) error {
defer wg.Done()
req, err := http.NewRequest("GET", url, nil)
if err != nil { if err != nil {
addProblem(url, fmt.Sprintf("Error fetching: %v", err)) return err
wg.Done() }
continue res, err := http.DefaultTransport.RoundTrip(req)
if err != nil {
return err
}
// Handle redirects.
if res.StatusCode/100 == 3 {
newURL, err := res.Location()
if err != nil {
return fmt.Errorf("resolving redirect: %v", err)
}
if !strings.HasPrefix(newURL.String(), *root) {
// Skip off-site redirects.
return nil
}
crawl(newURL.String(), url)
return nil
} }
if res.StatusCode != 200 { if res.StatusCode != 200 {
addProblem(url, fmt.Sprintf("Status code = %d", res.StatusCode)) return errors.New(res.Status)
wg.Done()
continue
} }
slurp, err := ioutil.ReadAll(res.Body) slurp, err := ioutil.ReadAll(res.Body)
res.Body.Close() res.Body.Close()
@ -141,9 +167,7 @@ func crawlLoop() {
} }
fragExists[urlFrag{url, id}] = true fragExists[urlFrag{url, id}] = true
} }
return nil
wg.Done()
}
} }
func main() { func main() {
@ -151,7 +175,6 @@ func main() {
go crawlLoop() go crawlLoop()
crawl(*root, "") crawl(*root, "")
crawl(*root+"/doc/go1.1.html", "")
wg.Wait() wg.Wait()
close(urlq) close(urlq)
@ -164,4 +187,7 @@ func main() {
for _, s := range problems { for _, s := range problems {
fmt.Println(s) fmt.Println(s)
} }
if len(problems) > 0 {
os.Exit(1)
}
} }