internal/imports: cache things outside the mod cache

Since a user's module cache is generally going to be much bigger than
their main module, one would expect that caching just information about
the module cache would be sufficient. It turns out that's not correct.
When we discover something in the module cache, we have to make sure
that a different version of it isn't already in scope. Doing that can
require information about the main module or replace targets, so that
needs to be cached too.

Concretely, when I'm working in x/tools, if a scan discovers a version
of x/tools in the module cache, it should usually ignore that version.
But that might not be true in more complicated cases, especially those
involving nested modules whose boundaries change.

So, cache everything except GOROOT. Since the new data is mutable,
we store it separately from the module cache data so that it can be
discarded easily between runs.

Change-Id: I47364f6c0270fee03af8898fec6c85d1b9c8d780
Reviewed-on: https://go-review.googlesource.com/c/tools/+/202045
Reviewed-by: Rebecca Stambler <rstambler@golang.org>
This commit is contained in:
Heschi Kreinick 2019-10-09 19:05:30 -04:00
parent eb46839a96
commit d2fffb4b84
7 changed files with 130 additions and 114 deletions

View File

@ -16,6 +16,7 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"strings" "strings"
"time"
"golang.org/x/tools/internal/fastwalk" "golang.org/x/tools/internal/fastwalk"
) )
@ -83,8 +84,9 @@ func walkDir(root Root, add func(Root, string), skip func(root Root, dir string)
} }
return return
} }
start := time.Now()
if opts.Debug { if opts.Debug {
log.Printf("scanning %s", root.Path) log.Printf("gopathwalk: scanning %s", root.Path)
} }
w := &walker{ w := &walker{
root: root, root: root,
@ -98,7 +100,7 @@ func walkDir(root Root, add func(Root, string), skip func(root Root, dir string)
} }
if opts.Debug { if opts.Debug {
log.Printf("scanned %s", root.Path) log.Printf("gopathwalk: scanned %s in %v", root.Path, time.Since(start))
} }
} }

View File

@ -8,6 +8,7 @@ import (
"flag" "flag"
"fmt" "fmt"
"go/build" "go/build"
"log"
"path/filepath" "path/filepath"
"reflect" "reflect"
"runtime" "runtime"
@ -1642,6 +1643,7 @@ func (c testConfig) test(t *testing.T, fn func(*goimportTest)) {
WorkingDir: exported.Config.Dir, WorkingDir: exported.Config.Dir,
ForceGoPackages: forceGoPackages, ForceGoPackages: forceGoPackages,
Debug: *testDebug, Debug: *testDebug,
Logf: log.Printf,
}, },
exported: exported, exported: exported,
} }

View File

@ -31,8 +31,9 @@ type ModuleResolver struct {
ModsByModPath []*ModuleJSON // All modules, ordered by # of path components in module Path... ModsByModPath []*ModuleJSON // All modules, ordered by # of path components in module Path...
ModsByDir []*ModuleJSON // ...or Dir. ModsByDir []*ModuleJSON // ...or Dir.
// moduleCacheInfo stores information about the module cache. // moduleCacheCache stores information about the module cache.
moduleCacheInfo *moduleCacheInfo moduleCacheCache *dirInfoCache
otherCache *dirInfoCache
} }
type ModuleJSON struct { type ModuleJSON struct {
@ -93,9 +94,14 @@ func (r *ModuleResolver) init() error {
return count(j) < count(i) // descending order return count(j) < count(i) // descending order
}) })
if r.moduleCacheInfo == nil { if r.moduleCacheCache == nil {
r.moduleCacheInfo = &moduleCacheInfo{ r.moduleCacheCache = &dirInfoCache{
modCacheDirInfo: make(map[string]*directoryPackageInfo), dirs: map[string]*directoryPackageInfo{},
}
}
if r.otherCache == nil {
r.otherCache = &dirInfoCache{
dirs: map[string]*directoryPackageInfo{},
} }
} }
@ -103,6 +109,20 @@ func (r *ModuleResolver) init() error {
return nil return nil
} }
func (r *ModuleResolver) ClearForNewScan() {
r.otherCache = &dirInfoCache{
dirs: map[string]*directoryPackageInfo{},
}
}
func (r *ModuleResolver) ClearForNewMod() {
env := r.env
*r = ModuleResolver{
env: env,
}
r.init()
}
// findPackage returns the module and directory that contains the package at // findPackage returns the module and directory that contains the package at
// the given import path, or returns nil, "" if no module is in scope. // the given import path, or returns nil, "" if no module is in scope.
func (r *ModuleResolver) findPackage(importPath string) (*ModuleJSON, string) { func (r *ModuleResolver) findPackage(importPath string) (*ModuleJSON, string) {
@ -118,11 +138,7 @@ func (r *ModuleResolver) findPackage(importPath string) (*ModuleJSON, string) {
continue continue
} }
if info, ok := r.moduleCacheInfo.Load(pkgDir); ok { if info, ok := r.cacheLoad(pkgDir); ok {
// This is slightly wrong: a directory doesn't have to have an
// importable package to count as a package for package-to-module
// resolution. package main or _test files should count but
// don't.
if loaded, err := info.reachedStatus(nameLoaded); loaded { if loaded, err := info.reachedStatus(nameLoaded); loaded {
if err != nil { if err != nil {
continue // No package in this dir. continue // No package in this dir.
@ -132,13 +148,21 @@ func (r *ModuleResolver) findPackage(importPath string) (*ModuleJSON, string) {
if scanned, err := info.reachedStatus(directoryScanned); scanned && err != nil { if scanned, err := info.reachedStatus(directoryScanned); scanned && err != nil {
continue // Dir is unreadable, etc. continue // Dir is unreadable, etc.
} }
// This is slightly wrong: a directory doesn't have to have an
// importable package to count as a package for package-to-module
// resolution. package main or _test files should count but
// don't.
// TODO(heschi): fix this.
if _, err := r.cachePackageName(pkgDir); err == nil {
return m, pkgDir
}
} }
// Not cached. Read the filesystem.
pkgFiles, err := ioutil.ReadDir(pkgDir) pkgFiles, err := ioutil.ReadDir(pkgDir)
if err != nil { if err != nil {
continue continue
} }
// A module only contains a package if it has buildable go // A module only contains a package if it has buildable go
// files in that directory. If not, it could be provided by an // files in that directory. If not, it could be provided by an
// outer module. See #29736. // outer module. See #29736.
@ -151,6 +175,42 @@ func (r *ModuleResolver) findPackage(importPath string) (*ModuleJSON, string) {
return nil, "" return nil, ""
} }
func (r *ModuleResolver) cacheLoad(dir string) (directoryPackageInfo, bool) {
if info, ok := r.moduleCacheCache.Load(dir); ok {
return info, ok
}
return r.otherCache.Load(dir)
}
func (r *ModuleResolver) cacheStore(info directoryPackageInfo) {
if info.rootType == gopathwalk.RootModuleCache {
r.moduleCacheCache.Store(info.dir, info)
} else {
r.otherCache.Store(info.dir, info)
}
}
func (r *ModuleResolver) cacheKeys() []string {
return append(r.moduleCacheCache.Keys(), r.otherCache.Keys()...)
}
// cachePackageName caches the package name for a dir already in the cache.
func (r *ModuleResolver) cachePackageName(dir string) (directoryPackageInfo, error) {
info, ok := r.cacheLoad(dir)
if !ok {
panic("cachePackageName on uncached dir " + dir)
}
loaded, err := info.reachedStatus(nameLoaded)
if loaded {
return info, err
}
info.packageName, info.err = packageDirToName(info.dir)
info.status = nameLoaded
r.cacheStore(info)
return info, info.err
}
// findModuleByDir returns the module that contains dir, or nil if no such // findModuleByDir returns the module that contains dir, or nil if no such
// module is in scope. // module is in scope.
func (r *ModuleResolver) findModuleByDir(dir string) *ModuleJSON { func (r *ModuleResolver) findModuleByDir(dir string) *ModuleJSON {
@ -269,22 +329,15 @@ func (r *ModuleResolver) scan(_ references, loadNames bool, exclude []gopathwalk
roots = filterRoots(roots, exclude) roots = filterRoots(roots, exclude)
var result []*pkg var result []*pkg
dupCheck := make(map[string]bool)
var mu sync.Mutex var mu sync.Mutex
// Packages in the module cache are immutable. If we have // We assume cached directories have not changed. We can skip them and their
// already seen this package on a previous scan of the module // children.
// cache, return that result.
skip := func(root gopathwalk.Root, dir string) bool { skip := func(root gopathwalk.Root, dir string) bool {
mu.Lock() mu.Lock()
defer mu.Unlock() defer mu.Unlock()
// If we have already processed this directory on this walk, skip it.
if _, dup := dupCheck[dir]; dup {
return true
}
// If we have saved this directory information, skip it. info, ok := r.cacheLoad(dir)
info, ok := r.moduleCacheInfo.Load(dir)
if !ok { if !ok {
return false return false
} }
@ -295,51 +348,19 @@ func (r *ModuleResolver) scan(_ references, loadNames bool, exclude []gopathwalk
return packageScanned return packageScanned
} }
// Add anything new to the cache. We'll process everything in it below.
add := func(root gopathwalk.Root, dir string) { add := func(root gopathwalk.Root, dir string) {
mu.Lock() mu.Lock()
defer mu.Unlock() defer mu.Unlock()
if _, dup := dupCheck[dir]; dup {
return
}
info, err := r.scanDirForPackage(root, dir) r.cacheStore(r.scanDirForPackage(root, dir))
if err != nil {
return
}
if root.Type == gopathwalk.RootModuleCache {
// Save this package information in the cache and return.
// Packages from the module cache are added after Walk.
r.moduleCacheInfo.Store(dir, info)
return
}
// Skip this package if there was an error loading package info.
if info.err != nil {
return
}
if loadNames {
info.packageName, info.err = packageDirToName(dir)
if info.err != nil {
return
}
}
// The rest of this function canonicalizes the packages using the results
// of initializing the resolver from 'go list -m'.
res, err := r.canonicalize(root.Type, info)
if err != nil {
return
}
result = append(result, res)
} }
gopathwalk.WalkSkip(roots, add, skip, gopathwalk.Options{Debug: r.env.Debug, ModulesEnabled: true}) gopathwalk.WalkSkip(roots, add, skip, gopathwalk.Options{Debug: r.env.Debug, ModulesEnabled: true})
// Add the packages from the modules in the mod cache that were skipped. // Everything we already had, and everything new, is now in the cache.
for _, dir := range r.moduleCacheInfo.Keys() { for _, dir := range r.cacheKeys() {
info, ok := r.moduleCacheInfo.Load(dir) info, ok := r.cacheLoad(dir)
if !ok { if !ok {
continue continue
} }
@ -351,21 +372,13 @@ func (r *ModuleResolver) scan(_ references, loadNames bool, exclude []gopathwalk
// If we want package names, make sure the cache has them. // If we want package names, make sure the cache has them.
if loadNames { if loadNames {
loaded, err := info.reachedStatus(nameLoaded) var err error
if loaded && err != nil { if info, err = r.cachePackageName(info.dir); err != nil {
continue continue
} }
if !loaded {
info.packageName, info.err = packageDirToName(info.dir)
info.status = nameLoaded
r.moduleCacheInfo.Store(info.dir, info)
if info.err != nil {
continue
}
}
} }
res, err := r.canonicalize(gopathwalk.RootModuleCache, info) res, err := r.canonicalize(info)
if err != nil { if err != nil {
continue continue
} }
@ -377,9 +390,9 @@ func (r *ModuleResolver) scan(_ references, loadNames bool, exclude []gopathwalk
// canonicalize gets the result of canonicalizing the packages using the results // canonicalize gets the result of canonicalizing the packages using the results
// of initializing the resolver from 'go list -m'. // of initializing the resolver from 'go list -m'.
func (r *ModuleResolver) canonicalize(rootType gopathwalk.RootType, info directoryPackageInfo) (*pkg, error) { func (r *ModuleResolver) canonicalize(info directoryPackageInfo) (*pkg, error) {
// Packages in GOROOT are already canonical, regardless of the std/cmd modules. // Packages in GOROOT are already canonical, regardless of the std/cmd modules.
if rootType == gopathwalk.RootGOROOT { if info.rootType == gopathwalk.RootGOROOT {
return &pkg{ return &pkg{
importPathShort: info.nonCanonicalImportPath, importPathShort: info.nonCanonicalImportPath,
dir: info.dir, dir: info.dir,
@ -422,7 +435,7 @@ func (r *ModuleResolver) loadExports(ctx context.Context, expectPackage string,
return loadExportsFromFiles(ctx, r.env, expectPackage, pkg.dir) return loadExportsFromFiles(ctx, r.env, expectPackage, pkg.dir)
} }
func (r *ModuleResolver) scanDirForPackage(root gopathwalk.Root, dir string) (directoryPackageInfo, error) { func (r *ModuleResolver) scanDirForPackage(root gopathwalk.Root, dir string) directoryPackageInfo {
subdir := "" subdir := ""
if dir != root.Path { if dir != root.Path {
subdir = dir[len(root.Path)+len("/"):] subdir = dir[len(root.Path)+len("/"):]
@ -434,7 +447,10 @@ func (r *ModuleResolver) scanDirForPackage(root gopathwalk.Root, dir string) (di
// is a mess. There's no easy way to tell if it's on. // is a mess. There's no easy way to tell if it's on.
// We can still find things in the mod cache and // We can still find things in the mod cache and
// map them into /vendor when -mod=vendor is on. // map them into /vendor when -mod=vendor is on.
return directoryPackageInfo{}, fmt.Errorf("vendor directory") return directoryPackageInfo{
status: directoryScanned,
err: fmt.Errorf("vendor directory"),
}
} }
switch root.Type { switch root.Type {
case gopathwalk.RootCurrentModule: case gopathwalk.RootCurrentModule:
@ -445,7 +461,7 @@ func (r *ModuleResolver) scanDirForPackage(root gopathwalk.Root, dir string) (di
return directoryPackageInfo{ return directoryPackageInfo{
status: directoryScanned, status: directoryScanned,
err: fmt.Errorf("invalid module cache path: %v", subdir), err: fmt.Errorf("invalid module cache path: %v", subdir),
}, nil }
} }
modPath, err := module.DecodePath(filepath.ToSlash(matches[1])) modPath, err := module.DecodePath(filepath.ToSlash(matches[1]))
if err != nil { if err != nil {
@ -455,7 +471,7 @@ func (r *ModuleResolver) scanDirForPackage(root gopathwalk.Root, dir string) (di
return directoryPackageInfo{ return directoryPackageInfo{
status: directoryScanned, status: directoryScanned,
err: fmt.Errorf("decoding module cache path %q: %v", subdir, err), err: fmt.Errorf("decoding module cache path %q: %v", subdir, err),
}, nil }
} }
importPath = path.Join(modPath, filepath.ToSlash(matches[3])) importPath = path.Join(modPath, filepath.ToSlash(matches[3]))
case gopathwalk.RootGOROOT: case gopathwalk.RootGOROOT:
@ -465,12 +481,13 @@ func (r *ModuleResolver) scanDirForPackage(root gopathwalk.Root, dir string) (di
result := directoryPackageInfo{ result := directoryPackageInfo{
status: directoryScanned, status: directoryScanned,
dir: dir, dir: dir,
rootType: root.Type,
nonCanonicalImportPath: importPath, nonCanonicalImportPath: importPath,
needsReplace: false, needsReplace: false,
} }
if root.Type == gopathwalk.RootGOROOT { if root.Type == gopathwalk.RootGOROOT {
// stdlib packages are always in scope, despite the confusing go.mod // stdlib packages are always in scope, despite the confusing go.mod
return result, nil return result
} }
// Check that this package is not obviously impossible to import. // Check that this package is not obviously impossible to import.
modFile := r.findModFile(dir) modFile := r.findModFile(dir)
@ -483,7 +500,7 @@ func (r *ModuleResolver) scanDirForPackage(root gopathwalk.Root, dir string) (di
result.needsReplace = true result.needsReplace = true
} }
return result, nil return result
} }
// modCacheRegexp splits a path in a module cache into module, module version, and package. // modCacheRegexp splits a path in a module cache into module, module version, and package.

View File

@ -2,11 +2,10 @@ package imports
import ( import (
"sync" "sync"
"golang.org/x/tools/internal/gopathwalk"
) )
// ModuleResolver implements Resolver for modules using the go command as little
// as feasible.
//
// To find packages to import, the resolver needs to know about all of the // To find packages to import, the resolver needs to know about all of the
// the packages that could be imported. This includes packages that are // the packages that could be imported. This includes packages that are
// already in modules that are in (1) the current module, (2) replace targets, // already in modules that are in (1) the current module, (2) replace targets,
@ -42,12 +41,13 @@ type directoryPackageInfo struct {
// Set when status >= directoryScanned. // Set when status >= directoryScanned.
// dir is the absolute directory of this package. // dir is the absolute directory of this package.
dir string dir string
// nonCanonicalImportPath is the expected import path for this package. rootType gopathwalk.RootType
// This may not be an import path that can be used to import this package. // nonCanonicalImportPath is the package's expected import path. It may
// not actually be importable at that path.
nonCanonicalImportPath string nonCanonicalImportPath string
// needsReplace is true if the nonCanonicalImportPath does not match the // needsReplace is true if the nonCanonicalImportPath does not match the
// the modules declared path, making it impossible to import without a // module's declared path, making it impossible to import without a
// replace directive. // replace directive.
needsReplace bool needsReplace bool
@ -68,8 +68,8 @@ func (info *directoryPackageInfo) reachedStatus(target directoryPackageStatus) (
return true, nil return true, nil
} }
// moduleCacheInfo is a concurrency safe map for storing information about // dirInfoCache is a concurrency safe map for storing information about
// the directories in the module cache. // directories that may contain packages.
// //
// The information in this cache is built incrementally. Entries are initialized in scan. // The information in this cache is built incrementally. Entries are initialized in scan.
// No new keys should be added in any other functions, as all directories containing // No new keys should be added in any other functions, as all directories containing
@ -78,32 +78,30 @@ func (info *directoryPackageInfo) reachedStatus(target directoryPackageStatus) (
// Other functions, including loadExports and findPackage, may update entries in this cache // Other functions, including loadExports and findPackage, may update entries in this cache
// as they discover new things about the directory. // as they discover new things about the directory.
// //
// We do not need to protect the data in the cache for multiple writes, because it only stores // The information in the cache is not expected to change for the cache's
// module cache directories, which do not change. If two competing stores take place, there will be // lifetime, so there is no protection against competing writes. Users should
// one store that wins. Although this could result in a loss of information it will not be incorrect // take care not to hold the cache across changes to the underlying files.
// and may just result in recomputing the same result later.
// //
// TODO(suzmue): consider other concurrency strategies and data structures (RWLocks, sync.Map, etc) // TODO(suzmue): consider other concurrency strategies and data structures (RWLocks, sync.Map, etc)
type moduleCacheInfo struct { type dirInfoCache struct {
mu sync.Mutex mu sync.Mutex
// modCacheDirInfo stores information about packages in // dirs stores information about packages in directories, keyed by absolute path.
// module cache directories. Keyed by absolute directory. dirs map[string]*directoryPackageInfo
modCacheDirInfo map[string]*directoryPackageInfo
} }
// Store stores the package info for dir. // Store stores the package info for dir.
func (d *moduleCacheInfo) Store(dir string, info directoryPackageInfo) { func (d *dirInfoCache) Store(dir string, info directoryPackageInfo) {
d.mu.Lock() d.mu.Lock()
defer d.mu.Unlock() defer d.mu.Unlock()
stored := info // defensive copy stored := info // defensive copy
d.modCacheDirInfo[dir] = &stored d.dirs[dir] = &stored
} }
// Load returns a copy of the directoryPackageInfo for absolute directory dir. // Load returns a copy of the directoryPackageInfo for absolute directory dir.
func (d *moduleCacheInfo) Load(dir string) (directoryPackageInfo, bool) { func (d *dirInfoCache) Load(dir string) (directoryPackageInfo, bool) {
d.mu.Lock() d.mu.Lock()
defer d.mu.Unlock() defer d.mu.Unlock()
info, ok := d.modCacheDirInfo[dir] info, ok := d.dirs[dir]
if !ok { if !ok {
return directoryPackageInfo{}, false return directoryPackageInfo{}, false
} }
@ -111,10 +109,10 @@ func (d *moduleCacheInfo) Load(dir string) (directoryPackageInfo, bool) {
} }
// Keys returns the keys currently present in d. // Keys returns the keys currently present in d.
func (d *moduleCacheInfo) Keys() (keys []string) { func (d *dirInfoCache) Keys() (keys []string) {
d.mu.Lock() d.mu.Lock()
defer d.mu.Unlock() defer d.mu.Unlock()
for key := range d.modCacheDirInfo { for key := range d.dirs {
keys = append(keys, key) keys = append(keys, key)
} }
return keys return keys

View File

@ -53,8 +53,8 @@ func TestDirectoryPackageInfoReachedStatus(t *testing.T) {
} }
func TestModCacheInfo(t *testing.T) { func TestModCacheInfo(t *testing.T) {
m := &moduleCacheInfo{ m := &dirInfoCache{
modCacheDirInfo: make(map[string]*directoryPackageInfo), dirs: make(map[string]*directoryPackageInfo),
} }
dirInfo := []struct { dirInfo := []struct {

View File

@ -807,10 +807,11 @@ func BenchmarkScanModCache(b *testing.B) {
GOROOT: build.Default.GOROOT, GOROOT: build.Default.GOROOT,
Logf: log.Printf, Logf: log.Printf,
} }
exclude := []gopathwalk.RootType{gopathwalk.RootCurrentModule, gopathwalk.RootGOROOT, gopathwalk.RootOther} exclude := []gopathwalk.RootType{gopathwalk.RootGOROOT}
env.GetResolver().scan(nil, true, exclude) env.GetResolver().scan(nil, true, exclude)
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
env.GetResolver().scan(nil, true, exclude) env.GetResolver().scan(nil, true, exclude)
env.GetResolver().(*ModuleResolver).ClearForNewScan()
} }
} }

View File

@ -141,15 +141,11 @@ func (v *view) RunProcessEnvFunc(ctx context.Context, fn func(*imports.Options)
} }
// Before running the user provided function, clear caches in the resolver. // Before running the user provided function, clear caches in the resolver.
if v.modFilesChanged() { if r, ok := v.processEnv.GetResolver().(*imports.ModuleResolver); ok {
if r, ok := v.processEnv.GetResolver().(*imports.ModuleResolver); ok { if v.modFilesChanged() {
// Clear the resolver cache and set Initialized to false. r.ClearForNewMod()
r.Initialized = false } else {
r.Main = nil r.ClearForNewScan()
r.ModsByModPath = nil
r.ModsByDir = nil
// Reset the modFileVersions.
v.modFileVersions = nil
} }
} }