From 57dac327d15b4debe33057b9ca785e303731e81c Mon Sep 17 00:00:00 2001 From: Sean Liao Date: Wed, 19 Mar 2025 16:34:43 +0000 Subject: [PATCH] misc/linkcheck: remove unused tool Fixes #72953 Change-Id: I8a0c8da3f8309841147412a078bc82095a93c5b9 Reviewed-on: https://go-review.googlesource.com/c/go/+/659275 LUCI-TryBot-Result: Go LUCI Reviewed-by: Ian Lance Taylor Auto-Submit: Ian Lance Taylor Reviewed-by: Michael Pratt --- misc/linkcheck/linkcheck.go | 191 ------------------------------------ 1 file changed, 191 deletions(-) delete mode 100644 misc/linkcheck/linkcheck.go diff --git a/misc/linkcheck/linkcheck.go b/misc/linkcheck/linkcheck.go deleted file mode 100644 index efe400965b..0000000000 --- a/misc/linkcheck/linkcheck.go +++ /dev/null @@ -1,191 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// The linkcheck command finds missing links in the godoc website. -// It crawls a URL recursively and notes URLs and URL fragments -// that it's seen and prints a report of missing links at the end. -package main - -import ( - "errors" - "flag" - "fmt" - "io" - "log" - "net/http" - "os" - "regexp" - "strings" - "sync" -) - -var ( - root = flag.String("root", "http://localhost:6060", "Root to crawl") - verbose = flag.Bool("verbose", false, "verbose") -) - -var wg sync.WaitGroup // outstanding fetches -var urlq = make(chan string) // URLs to crawl - -// urlFrag is a URL and its optional #fragment (without the #) -type urlFrag struct { - url, frag string -} - -var ( - mu sync.Mutex - crawled = make(map[string]bool) // URL without fragment -> true - neededFrags = make(map[urlFrag][]string) // URL#frag -> who needs it -) - -var aRx = regexp.MustCompile(`]+)`) - -// Owned by crawlLoop goroutine: -var ( - linkSources = make(map[string][]string) // url no fragment -> sources - fragExists = make(map[urlFrag]bool) - problems []string -) - -func localLinks(body string) (links []string) { - seen := map[string]bool{} - mv := aRx.FindAllStringSubmatch(body, -1) - for _, m := range mv { - ref := m[1] - if strings.HasPrefix(ref, "/src/") { - continue - } - if !seen[ref] { - seen[ref] = true - links = append(links, m[1]) - } - } - return -} - -var idRx = regexp.MustCompile(`\bid=['"]?([^\s'">]+)`) - -func pageIDs(body string) (ids []string) { - mv := idRx.FindAllStringSubmatch(body, -1) - for _, m := range mv { - ids = append(ids, m[1]) - } - return -} - -// url may contain a #fragment, and the fragment is then noted as needing to exist. -func crawl(url string, sourceURL string) { - if strings.Contains(url, "/devel/release") { - return - } - mu.Lock() - defer mu.Unlock() - if u, frag, ok := strings.Cut(url, "#"); ok { - url = u - if frag != "" { - uf := urlFrag{url, frag} - neededFrags[uf] = append(neededFrags[uf], sourceURL) - } - } - if crawled[url] { - return - } - crawled[url] = true - - wg.Add(1) - go func() { - urlq <- url - }() -} - -func addProblem(url, errmsg string) { - msg := fmt.Sprintf("Error on %s: %s (from %s)", url, errmsg, linkSources[url]) - if *verbose { - log.Print(msg) - } - problems = append(problems, msg) -} - -func crawlLoop() { - for url := range urlq { - if err := doCrawl(url); err != nil { - addProblem(url, err.Error()) - } - } -} - -func doCrawl(url string) error { - defer wg.Done() - - req, err := http.NewRequest("GET", url, nil) - if err != nil { - return err - } - res, err := http.DefaultTransport.RoundTrip(req) - if err != nil { - return err - } - // Handle redirects. - if res.StatusCode/100 == 3 { - newURL, err := res.Location() - if err != nil { - return fmt.Errorf("resolving redirect: %v", err) - } - if !strings.HasPrefix(newURL.String(), *root) { - // Skip off-site redirects. - return nil - } - crawl(newURL.String(), url) - return nil - } - if res.StatusCode != 200 { - return errors.New(res.Status) - } - slurp, err := io.ReadAll(res.Body) - res.Body.Close() - if err != nil { - log.Fatalf("Error reading %s body: %v", url, err) - } - if *verbose { - log.Printf("Len of %s: %d", url, len(slurp)) - } - body := string(slurp) - for _, ref := range localLinks(body) { - if *verbose { - log.Printf(" links to %s", ref) - } - dest := *root + ref - linkSources[dest] = append(linkSources[dest], url) - crawl(dest, url) - } - for _, id := range pageIDs(body) { - if *verbose { - log.Printf(" url %s has #%s", url, id) - } - fragExists[urlFrag{url, id}] = true - } - return nil -} - -func main() { - flag.Parse() - - go crawlLoop() - crawl(*root, "") - - wg.Wait() - close(urlq) - for uf, needers := range neededFrags { - if !fragExists[uf] { - problems = append(problems, fmt.Sprintf("Missing fragment for %+v from %v", uf, needers)) - } - } - - for _, s := range problems { - fmt.Println(s) - } - if len(problems) > 0 { - os.Exit(1) - } -}