Home | History | Annotate | Download | only in finder
      1 // Copyright 2017 Google Inc. All rights reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 package finder
     16 
     17 import (
     18 	"bufio"
     19 	"bytes"
     20 	"encoding/json"
     21 	"errors"
     22 	"fmt"
     23 	"io"
     24 	"os"
     25 	"path/filepath"
     26 	"runtime"
     27 	"sort"
     28 	"strings"
     29 	"sync"
     30 	"sync/atomic"
     31 	"time"
     32 
     33 	"android/soong/finder/fs"
     34 )
     35 
     36 // This file provides a Finder struct that can quickly search for files satisfying
     37 // certain criteria.
     38 // This Finder gets its speed partially from parallelism and partially from caching.
     39 // If a Stat call returns the same result as last time, then it means Finder
     40 // can skip the ReadDir call for that dir.
     41 
     42 // The primary data structure used by the finder is the field Finder.nodes ,
     43 // which is a tree of nodes of type *pathMap .
     44 // Each node represents a directory on disk, along with its stats, subdirectories,
     45 // and contained files.
     46 
     47 // The common use case for the Finder is that the caller creates a Finder and gives
     48 // it the same query that was given to it in the previous execution.
     49 // In this situation, the major events that take place are:
     50 // 1. The Finder begins to load its db
     51 // 2. The Finder begins to stat the directories mentioned in its db (using multiple threads)
     52 //    Calling Stat on each of these directories is generally a large fraction of the total time
     53 // 3. The Finder begins to construct a separate tree of nodes in each of its threads
     54 // 4. The Finder merges the individual node trees into the main node tree
     55 // 5. The Finder may call ReadDir a few times if there are a few directories that are out-of-date
     56 //    These ReadDir calls might prompt additional Stat calls, etc
     57 // 6. The Finder waits for all loading to complete
     58 // 7. The Finder searches the cache for files matching the user's query (using multiple threads)
     59 
     60 // These are the invariants regarding concurrency:
     61 // 1. The public methods of Finder are threadsafe.
     62 //      The public methods are only performance-optimized for one caller at a time, however.
     63 //      For the moment, multiple concurrent callers shouldn't expect any better performance than
     64 //      multiple serial callers.
     65 // 2. While building the node tree, only one thread may ever access the <children> collection of a
     66 //    *pathMap at once.
     67 //    a) The thread that accesses the <children> collection is the thread that discovers the
     68 //       children (by reading them from the cache or by having received a response to ReadDir).
     69 //       1) Consequently, the thread that discovers the children also spawns requests to stat
     70 //          subdirs.
     71 //    b) Consequently, while building the node tree, no thread may do a lookup of its
     72 //       *pathMap via filepath because another thread may be adding children to the
     73 //       <children> collection of an ancestor node. Additionally, in rare cases, another thread
     74 //       may be removing children from an ancestor node if the children were only discovered to
     75 //       be irrelevant after calling ReadDir (which happens if a prune-file was just added).
     76 // 3. No query will begin to be serviced until all loading (both reading the db
     77 //    and scanning the filesystem) is complete.
     78 //    Tests indicate that it only takes about 10% as long to search the in-memory cache as to
     79 //    generate it, making this not a huge loss in performance.
     80 // 4. The parsing of the db and the initial setup of the pathMap tree must complete before
     81 //      beginning to call listDirSync (because listDirSync can create new entries in the pathMap)
     82 
     83 // see cmd/finder.go or finder_test.go for usage examples
     84 
     85 // Update versionString whenever making a backwards-incompatible change to the cache file format
     86 const versionString = "Android finder version 1"
     87 
     88 // a CacheParams specifies which files and directories the user wishes be scanned and
     89 // potentially added to the cache
     90 type CacheParams struct {
     91 	// WorkingDirectory is used as a base for any relative file paths given to the Finder
     92 	WorkingDirectory string
     93 
     94 	// RootDirs are the root directories used to initiate the search
     95 	RootDirs []string
     96 
     97 	// ExcludeDirs are directory names that if encountered are removed from the search
     98 	ExcludeDirs []string
     99 
    100 	// PruneFiles are file names that if encountered prune their entire directory
    101 	// (including siblings)
    102 	PruneFiles []string
    103 
    104 	// IncludeFiles are file names to include as matches
    105 	IncludeFiles []string
    106 }
    107 
    108 // a cacheConfig stores the inputs that determine what should be included in the cache
    109 type cacheConfig struct {
    110 	CacheParams
    111 
    112 	// FilesystemView is a unique identifier telling which parts of which file systems
    113 	// are readable by the Finder. In practice its value is essentially username@hostname.
    114 	// FilesystemView is set to ensure that a cache file copied to another host or
    115 	// found by another user doesn't inadvertently get reused.
    116 	FilesystemView string
    117 }
    118 
    119 func (p *cacheConfig) Dump() ([]byte, error) {
    120 	bytes, err := json.Marshal(p)
    121 	return bytes, err
    122 }
    123 
    124 // a cacheMetadata stores version information about the cache
    125 type cacheMetadata struct {
    126 	// The Version enables the Finder to determine whether it can even parse the file
    127 	// If the version changes, the entire cache file must be regenerated
    128 	Version string
    129 
    130 	// The CacheParams enables the Finder to determine whether the parameters match
    131 	// If the CacheParams change, the Finder can choose how much of the cache file to reuse
    132 	// (although in practice, the Finder will probably choose to ignore the entire file anyway)
    133 	Config cacheConfig
    134 }
    135 
    136 type Logger interface {
    137 	Output(calldepth int, s string) error
    138 }
    139 
    140 // the Finder is the main struct that callers will want to use
    141 type Finder struct {
    142 	// configuration
    143 	DbPath              string
    144 	numDbLoadingThreads int
    145 	numSearchingThreads int
    146 	cacheMetadata       cacheMetadata
    147 	logger              Logger
    148 	filesystem          fs.FileSystem
    149 
    150 	// temporary state
    151 	threadPool        *threadPool
    152 	mutex             sync.Mutex
    153 	fsErrs            []fsErr
    154 	errlock           sync.Mutex
    155 	shutdownWaitgroup sync.WaitGroup
    156 
    157 	// non-temporary state
    158 	modifiedFlag int32
    159 	nodes        pathMap
    160 }
    161 
    162 var defaultNumThreads = runtime.NumCPU() * 2
    163 
    164 // New creates a new Finder for use
    165 func New(cacheParams CacheParams, filesystem fs.FileSystem,
    166 	logger Logger, dbPath string) (f *Finder, err error) {
    167 	return newImpl(cacheParams, filesystem, logger, dbPath, defaultNumThreads)
    168 }
    169 
    170 // newImpl is like New but accepts more params
    171 func newImpl(cacheParams CacheParams, filesystem fs.FileSystem,
    172 	logger Logger, dbPath string, numThreads int) (f *Finder, err error) {
    173 	numDbLoadingThreads := numThreads
    174 	numSearchingThreads := numThreads
    175 
    176 	metadata := cacheMetadata{
    177 		Version: versionString,
    178 		Config: cacheConfig{
    179 			CacheParams:    cacheParams,
    180 			FilesystemView: filesystem.ViewId(),
    181 		},
    182 	}
    183 
    184 	f = &Finder{
    185 		numDbLoadingThreads: numDbLoadingThreads,
    186 		numSearchingThreads: numSearchingThreads,
    187 		cacheMetadata:       metadata,
    188 		logger:              logger,
    189 		filesystem:          filesystem,
    190 
    191 		nodes:  *newPathMap("/"),
    192 		DbPath: dbPath,
    193 
    194 		shutdownWaitgroup: sync.WaitGroup{},
    195 	}
    196 
    197 	f.loadFromFilesystem()
    198 
    199 	// check for any filesystem errors
    200 	err = f.getErr()
    201 	if err != nil {
    202 		return nil, err
    203 	}
    204 
    205 	// confirm that every path mentioned in the CacheConfig exists
    206 	for _, path := range cacheParams.RootDirs {
    207 		if !filepath.IsAbs(path) {
    208 			path = filepath.Join(f.cacheMetadata.Config.WorkingDirectory, path)
    209 		}
    210 		node := f.nodes.GetNode(filepath.Clean(path), false)
    211 		if node == nil || node.ModTime == 0 {
    212 			return nil, fmt.Errorf("path %v was specified to be included in the cache but does not exist\n", path)
    213 		}
    214 	}
    215 
    216 	return f, nil
    217 }
    218 
    219 // FindNamed searches for every cached file
    220 func (f *Finder) FindAll() []string {
    221 	return f.FindAt("/")
    222 }
    223 
    224 // FindNamed searches for every cached file under <rootDir>
    225 func (f *Finder) FindAt(rootDir string) []string {
    226 	filter := func(entries DirEntries) (dirNames []string, fileNames []string) {
    227 		return entries.DirNames, entries.FileNames
    228 	}
    229 	return f.FindMatching(rootDir, filter)
    230 }
    231 
    232 // FindNamed searches for every cached file named <fileName>
    233 func (f *Finder) FindNamed(fileName string) []string {
    234 	return f.FindNamedAt("/", fileName)
    235 }
    236 
    237 // FindNamedAt searches under <rootPath> for every file named <fileName>
    238 // The reason a caller might use FindNamedAt instead of FindNamed is if they want
    239 // to limit their search to a subset of the cache
    240 func (f *Finder) FindNamedAt(rootPath string, fileName string) []string {
    241 	filter := func(entries DirEntries) (dirNames []string, fileNames []string) {
    242 		matches := []string{}
    243 		for _, foundName := range entries.FileNames {
    244 			if foundName == fileName {
    245 				matches = append(matches, foundName)
    246 			}
    247 		}
    248 		return entries.DirNames, matches
    249 
    250 	}
    251 	return f.FindMatching(rootPath, filter)
    252 }
    253 
    254 // FindFirstNamed searches for every file named <fileName>
    255 // Whenever it finds a match, it stops search subdirectories
    256 func (f *Finder) FindFirstNamed(fileName string) []string {
    257 	return f.FindFirstNamedAt("/", fileName)
    258 }
    259 
    260 // FindFirstNamedAt searches for every file named <fileName>
    261 // Whenever it finds a match, it stops search subdirectories
    262 func (f *Finder) FindFirstNamedAt(rootPath string, fileName string) []string {
    263 	filter := func(entries DirEntries) (dirNames []string, fileNames []string) {
    264 		matches := []string{}
    265 		for _, foundName := range entries.FileNames {
    266 			if foundName == fileName {
    267 				matches = append(matches, foundName)
    268 			}
    269 		}
    270 
    271 		if len(matches) > 0 {
    272 			return []string{}, matches
    273 		}
    274 		return entries.DirNames, matches
    275 	}
    276 	return f.FindMatching(rootPath, filter)
    277 }
    278 
    279 // FindMatching is the most general exported function for searching for files in the cache
    280 // The WalkFunc will be invoked repeatedly and is expected to modify the provided DirEntries
    281 // in place, removing file paths and directories as desired.
    282 // WalkFunc will be invoked potentially many times in parallel, and must be threadsafe.
    283 func (f *Finder) FindMatching(rootPath string, filter WalkFunc) []string {
    284 	// set up some parameters
    285 	scanStart := time.Now()
    286 	var isRel bool
    287 	workingDir := f.cacheMetadata.Config.WorkingDirectory
    288 
    289 	isRel = !filepath.IsAbs(rootPath)
    290 	if isRel {
    291 		rootPath = filepath.Join(workingDir, rootPath)
    292 	}
    293 
    294 	rootPath = filepath.Clean(rootPath)
    295 
    296 	// ensure nothing else is using the Finder
    297 	f.verbosef("FindMatching waiting for finder to be idle\n")
    298 	f.lock()
    299 	defer f.unlock()
    300 
    301 	node := f.nodes.GetNode(rootPath, false)
    302 	if node == nil {
    303 		f.verbosef("No data for path %v ; apparently not included in cache params: %v\n",
    304 			rootPath, f.cacheMetadata.Config.CacheParams)
    305 		// path is not found; don't do a search
    306 		return []string{}
    307 	}
    308 
    309 	// search for matching files
    310 	f.verbosef("Finder finding %v using cache\n", rootPath)
    311 	results := f.findInCacheMultithreaded(node, filter, f.numSearchingThreads)
    312 
    313 	// format and return results
    314 	if isRel {
    315 		for i := 0; i < len(results); i++ {
    316 			results[i] = strings.Replace(results[i], workingDir+"/", "", 1)
    317 		}
    318 	}
    319 	sort.Strings(results)
    320 	f.verbosef("Found %v files under %v in %v using cache\n",
    321 		len(results), rootPath, time.Since(scanStart))
    322 	return results
    323 }
    324 
    325 // Shutdown declares that the finder is no longer needed and waits for its cleanup to complete
    326 // Currently, that only entails waiting for the database dump to complete.
    327 func (f *Finder) Shutdown() {
    328 	f.waitForDbDump()
    329 }
    330 
    331 // End of public api
    332 
    333 func (f *Finder) goDumpDb() {
    334 	if f.wasModified() {
    335 		f.shutdownWaitgroup.Add(1)
    336 		go func() {
    337 			err := f.dumpDb()
    338 			if err != nil {
    339 				f.verbosef("%v\n", err)
    340 			}
    341 			f.shutdownWaitgroup.Done()
    342 		}()
    343 	} else {
    344 		f.verbosef("Skipping dumping unmodified db\n")
    345 	}
    346 }
    347 
    348 func (f *Finder) waitForDbDump() {
    349 	f.shutdownWaitgroup.Wait()
    350 }
    351 
    352 // joinCleanPaths is like filepath.Join but is faster because
    353 // joinCleanPaths doesn't have to support paths ending in "/" or containing ".."
    354 func joinCleanPaths(base string, leaf string) string {
    355 	if base == "" {
    356 		return leaf
    357 	}
    358 	if base == "/" {
    359 		return base + leaf
    360 	}
    361 	if leaf == "" {
    362 		return base
    363 	}
    364 	return base + "/" + leaf
    365 }
    366 
    367 func (f *Finder) verbosef(format string, args ...interface{}) {
    368 	f.logger.Output(2, fmt.Sprintf(format, args...))
    369 }
    370 
    371 // loadFromFilesystem populates the in-memory cache based on the contents of the filesystem
    372 func (f *Finder) loadFromFilesystem() {
    373 	f.threadPool = newThreadPool(f.numDbLoadingThreads)
    374 
    375 	err := f.startFromExternalCache()
    376 	if err != nil {
    377 		f.startWithoutExternalCache()
    378 	}
    379 
    380 	f.goDumpDb()
    381 
    382 	f.threadPool = nil
    383 }
    384 
    385 func (f *Finder) startFind(path string) {
    386 	if !filepath.IsAbs(path) {
    387 		path = filepath.Join(f.cacheMetadata.Config.WorkingDirectory, path)
    388 	}
    389 	node := f.nodes.GetNode(path, true)
    390 	f.statDirAsync(node)
    391 }
    392 
    393 func (f *Finder) lock() {
    394 	f.mutex.Lock()
    395 }
    396 
    397 func (f *Finder) unlock() {
    398 	f.mutex.Unlock()
    399 }
    400 
    401 // a statResponse is the relevant portion of the response from the filesystem to a Stat call
    402 type statResponse struct {
    403 	ModTime int64
    404 	Inode   uint64
    405 	Device  uint64
    406 }
    407 
    408 // a pathAndStats stores a path and its stats
    409 type pathAndStats struct {
    410 	statResponse
    411 
    412 	Path string
    413 }
    414 
    415 // a dirFullInfo stores all of the relevant information we know about a directory
    416 type dirFullInfo struct {
    417 	pathAndStats
    418 
    419 	FileNames []string
    420 }
    421 
    422 // a PersistedDirInfo is the information about a dir that we save to our cache on disk
    423 type PersistedDirInfo struct {
    424 	// These field names are short because they are repeated many times in the output json file
    425 	P string   // path
    426 	T int64    // modification time
    427 	I uint64   // inode number
    428 	F []string // relevant filenames contained
    429 }
    430 
    431 // a PersistedDirs is the information that we persist for a group of dirs
    432 type PersistedDirs struct {
    433 	// the device on which each directory is stored
    434 	Device uint64
    435 	// the common root path to which all contained dirs are relative
    436 	Root string
    437 	// the directories themselves
    438 	Dirs []PersistedDirInfo
    439 }
    440 
    441 // a CacheEntry is the smallest unit that can be read and parsed from the cache (on disk) at a time
    442 type CacheEntry []PersistedDirs
    443 
    444 // a DirEntries lists the files and directories contained directly within a specific directory
    445 type DirEntries struct {
    446 	Path string
    447 
    448 	// elements of DirNames are just the dir names; they don't include any '/' character
    449 	DirNames []string
    450 	// elements of FileNames are just the file names; they don't include '/' character
    451 	FileNames []string
    452 }
    453 
    454 // a WalkFunc is the type that is passed into various Find functions for determining which
    455 // directories the caller wishes be walked. The WalkFunc is expected to decide which
    456 // directories to walk and which files to consider as matches to the original query.
    457 type WalkFunc func(DirEntries) (dirs []string, files []string)
    458 
    459 // a mapNode stores the relevant stats about a directory to be stored in a pathMap
    460 type mapNode struct {
    461 	statResponse
    462 	FileNames []string
    463 }
    464 
    465 // a pathMap implements the directory tree structure of nodes
    466 type pathMap struct {
    467 	mapNode
    468 
    469 	path string
    470 
    471 	children map[string]*pathMap
    472 
    473 	// number of descendent nodes, including self
    474 	approximateNumDescendents int
    475 }
    476 
    477 func newPathMap(path string) *pathMap {
    478 	result := &pathMap{path: path, children: make(map[string]*pathMap, 4),
    479 		approximateNumDescendents: 1}
    480 	return result
    481 }
    482 
    483 // GetNode returns the node at <path>
    484 func (m *pathMap) GetNode(path string, createIfNotFound bool) *pathMap {
    485 	if len(path) > 0 && path[0] == '/' {
    486 		path = path[1:]
    487 	}
    488 
    489 	node := m
    490 	for {
    491 		if path == "" {
    492 			return node
    493 		}
    494 
    495 		index := strings.Index(path, "/")
    496 		var firstComponent string
    497 		if index >= 0 {
    498 			firstComponent = path[:index]
    499 			path = path[index+1:]
    500 		} else {
    501 			firstComponent = path
    502 			path = ""
    503 		}
    504 
    505 		child, found := node.children[firstComponent]
    506 
    507 		if !found {
    508 			if createIfNotFound {
    509 				child = node.newChild(firstComponent)
    510 			} else {
    511 				return nil
    512 			}
    513 		}
    514 
    515 		node = child
    516 	}
    517 }
    518 
    519 func (m *pathMap) newChild(name string) (child *pathMap) {
    520 	path := joinCleanPaths(m.path, name)
    521 	newChild := newPathMap(path)
    522 	m.children[name] = newChild
    523 
    524 	return m.children[name]
    525 }
    526 
    527 func (m *pathMap) UpdateNumDescendents() int {
    528 	count := 1
    529 	for _, child := range m.children {
    530 		count += child.approximateNumDescendents
    531 	}
    532 	m.approximateNumDescendents = count
    533 	return count
    534 }
    535 
    536 func (m *pathMap) UpdateNumDescendentsRecursive() {
    537 	for _, child := range m.children {
    538 		child.UpdateNumDescendentsRecursive()
    539 	}
    540 	m.UpdateNumDescendents()
    541 }
    542 
    543 func (m *pathMap) MergeIn(other *pathMap) {
    544 	for key, theirs := range other.children {
    545 		ours, found := m.children[key]
    546 		if found {
    547 			ours.MergeIn(theirs)
    548 		} else {
    549 			m.children[key] = theirs
    550 		}
    551 	}
    552 	if other.ModTime != 0 {
    553 		m.mapNode = other.mapNode
    554 	}
    555 	m.UpdateNumDescendents()
    556 }
    557 
    558 func (m *pathMap) DumpAll() []dirFullInfo {
    559 	results := []dirFullInfo{}
    560 	m.dumpInto("", &results)
    561 	return results
    562 }
    563 
    564 func (m *pathMap) dumpInto(path string, results *[]dirFullInfo) {
    565 	*results = append(*results,
    566 		dirFullInfo{
    567 			pathAndStats{statResponse: m.statResponse, Path: path},
    568 			m.FileNames},
    569 	)
    570 	for key, child := range m.children {
    571 		childPath := joinCleanPaths(path, key)
    572 		if len(childPath) == 0 || childPath[0] != '/' {
    573 			childPath = "/" + childPath
    574 		}
    575 		child.dumpInto(childPath, results)
    576 	}
    577 }
    578 
    579 // a semaphore can be locked by up to <capacity> callers at once
    580 type semaphore struct {
    581 	pool chan bool
    582 }
    583 
    584 func newSemaphore(capacity int) *semaphore {
    585 	return &semaphore{pool: make(chan bool, capacity)}
    586 }
    587 
    588 func (l *semaphore) Lock() {
    589 	l.pool <- true
    590 }
    591 
    592 func (l *semaphore) Unlock() {
    593 	<-l.pool
    594 }
    595 
    596 // A threadPool runs goroutines and supports throttling and waiting.
    597 // Without throttling, Go may exhaust the maximum number of various resources, such as
    598 // threads or file descriptors, and crash the program.
    599 type threadPool struct {
    600 	receivedRequests sync.WaitGroup
    601 	activeRequests   semaphore
    602 }
    603 
    604 func newThreadPool(maxNumConcurrentThreads int) *threadPool {
    605 	return &threadPool{
    606 		receivedRequests: sync.WaitGroup{},
    607 		activeRequests:   *newSemaphore(maxNumConcurrentThreads),
    608 	}
    609 }
    610 
    611 // Run requests to run the given function in its own goroutine
    612 func (p *threadPool) Run(function func()) {
    613 	p.receivedRequests.Add(1)
    614 	// If Run() was called from within a goroutine spawned by this threadPool,
    615 	// then we may need to return from Run() before having capacity to actually
    616 	// run <function>.
    617 	//
    618 	// It's possible that the body of <function> contains a statement (such as a syscall)
    619 	// that will cause Go to pin it to a thread, or will contain a statement that uses
    620 	// another resource that is in short supply (such as a file descriptor), so we can't
    621 	// actually run <function> until we have capacity.
    622 	//
    623 	// However, the semaphore used for synchronization is implemented via a channel and
    624 	// shouldn't require a new thread for each access.
    625 	go func() {
    626 		p.activeRequests.Lock()
    627 		function()
    628 		p.activeRequests.Unlock()
    629 		p.receivedRequests.Done()
    630 	}()
    631 }
    632 
    633 // Wait waits until all goroutines are done, just like sync.WaitGroup's Wait
    634 func (p *threadPool) Wait() {
    635 	p.receivedRequests.Wait()
    636 }
    637 
    638 type fsErr struct {
    639 	path string
    640 	err  error
    641 }
    642 
    643 func (e fsErr) String() string {
    644 	return e.path + ": " + e.err.Error()
    645 }
    646 
    647 func (f *Finder) serializeCacheEntry(dirInfos []dirFullInfo) ([]byte, error) {
    648 	// group each dirFullInfo by its Device, to avoid having to repeat it in the output
    649 	dirsByDevice := map[uint64][]PersistedDirInfo{}
    650 	for _, entry := range dirInfos {
    651 		_, found := dirsByDevice[entry.Device]
    652 		if !found {
    653 			dirsByDevice[entry.Device] = []PersistedDirInfo{}
    654 		}
    655 		dirsByDevice[entry.Device] = append(dirsByDevice[entry.Device],
    656 			PersistedDirInfo{P: entry.Path, T: entry.ModTime, I: entry.Inode, F: entry.FileNames})
    657 	}
    658 
    659 	cacheEntry := CacheEntry{}
    660 
    661 	for device, infos := range dirsByDevice {
    662 		// find common prefix
    663 		prefix := ""
    664 		if len(infos) > 0 {
    665 			prefix = infos[0].P
    666 		}
    667 		for _, info := range infos {
    668 			for !strings.HasPrefix(info.P+"/", prefix+"/") {
    669 				prefix = filepath.Dir(prefix)
    670 				if prefix == "/" {
    671 					break
    672 				}
    673 			}
    674 		}
    675 		// remove common prefix
    676 		for i := range infos {
    677 			suffix := strings.Replace(infos[i].P, prefix, "", 1)
    678 			if len(suffix) > 0 && suffix[0] == '/' {
    679 				suffix = suffix[1:]
    680 			}
    681 			infos[i].P = suffix
    682 		}
    683 
    684 		// turn the map (keyed by device) into a list of structs with labeled fields
    685 		// this is to improve readability of the output
    686 		cacheEntry = append(cacheEntry, PersistedDirs{Device: device, Root: prefix, Dirs: infos})
    687 	}
    688 
    689 	// convert to json.
    690 	// it would save some space to use a different format than json for the db file,
    691 	// but the space and time savings are small, and json is easy for humans to read
    692 	bytes, err := json.Marshal(cacheEntry)
    693 	return bytes, err
    694 }
    695 
    696 func (f *Finder) parseCacheEntry(bytes []byte) ([]dirFullInfo, error) {
    697 	var cacheEntry CacheEntry
    698 	err := json.Unmarshal(bytes, &cacheEntry)
    699 	if err != nil {
    700 		return nil, err
    701 	}
    702 
    703 	// convert from a CacheEntry to a []dirFullInfo (by copying a few fields)
    704 	capacity := 0
    705 	for _, element := range cacheEntry {
    706 		capacity += len(element.Dirs)
    707 	}
    708 	nodes := make([]dirFullInfo, capacity)
    709 	count := 0
    710 	for _, element := range cacheEntry {
    711 		for _, dir := range element.Dirs {
    712 			path := joinCleanPaths(element.Root, dir.P)
    713 
    714 			nodes[count] = dirFullInfo{
    715 				pathAndStats: pathAndStats{
    716 					statResponse: statResponse{
    717 						ModTime: dir.T, Inode: dir.I, Device: element.Device,
    718 					},
    719 					Path: path},
    720 				FileNames: dir.F}
    721 			count++
    722 		}
    723 	}
    724 	return nodes, nil
    725 }
    726 
    727 // We use the following separator byte to distinguish individually parseable blocks of json
    728 // because we know this separator won't appear in the json that we're parsing.
    729 //
    730 // The newline byte can only appear in a UTF-8 stream if the newline character appears, because:
    731 // - The newline character is encoded as "0000 1010" in binary ("0a" in hex)
    732 // - UTF-8 dictates that bytes beginning with a "0" bit are never emitted as part of a multibyte
    733 //   character.
    734 //
    735 // We know that the newline character will never appear in our json string, because:
    736 // - If a newline character appears as part of a data string, then json encoding will
    737 //   emit two characters instead: '\' and 'n'.
    738 // - The json encoder that we use doesn't emit the optional newlines between any of its
    739 //   other outputs.
    740 const lineSeparator = byte('\n')
    741 
    742 func (f *Finder) readLine(reader *bufio.Reader) ([]byte, error) {
    743 	return reader.ReadBytes(lineSeparator)
    744 }
    745 
    746 // validateCacheHeader reads the cache header from cacheReader and tells whether the cache is compatible with this Finder
    747 func (f *Finder) validateCacheHeader(cacheReader *bufio.Reader) bool {
    748 	cacheVersionBytes, err := f.readLine(cacheReader)
    749 	if err != nil {
    750 		f.verbosef("Failed to read database header; database is invalid\n")
    751 		return false
    752 	}
    753 	if len(cacheVersionBytes) > 0 && cacheVersionBytes[len(cacheVersionBytes)-1] == lineSeparator {
    754 		cacheVersionBytes = cacheVersionBytes[:len(cacheVersionBytes)-1]
    755 	}
    756 	cacheVersionString := string(cacheVersionBytes)
    757 	currentVersion := f.cacheMetadata.Version
    758 	if cacheVersionString != currentVersion {
    759 		f.verbosef("Version changed from %q to %q, database is not applicable\n", cacheVersionString, currentVersion)
    760 		return false
    761 	}
    762 
    763 	cacheParamBytes, err := f.readLine(cacheReader)
    764 	if err != nil {
    765 		f.verbosef("Failed to read database search params; database is invalid\n")
    766 		return false
    767 	}
    768 
    769 	if len(cacheParamBytes) > 0 && cacheParamBytes[len(cacheParamBytes)-1] == lineSeparator {
    770 		cacheParamBytes = cacheParamBytes[:len(cacheParamBytes)-1]
    771 	}
    772 
    773 	currentParamBytes, err := f.cacheMetadata.Config.Dump()
    774 	if err != nil {
    775 		panic("Finder failed to serialize its parameters")
    776 	}
    777 	cacheParamString := string(cacheParamBytes)
    778 	currentParamString := string(currentParamBytes)
    779 	if cacheParamString != currentParamString {
    780 		f.verbosef("Params changed from %q to %q, database is not applicable\n", cacheParamString, currentParamString)
    781 		return false
    782 	}
    783 	return true
    784 }
    785 
    786 // loadBytes compares the cache info in <data> to the state of the filesystem
    787 // loadBytes returns a map representing <data> and also a slice of dirs that need to be re-walked
    788 func (f *Finder) loadBytes(id int, data []byte) (m *pathMap, dirsToWalk []string, err error) {
    789 
    790 	helperStartTime := time.Now()
    791 
    792 	cachedNodes, err := f.parseCacheEntry(data)
    793 	if err != nil {
    794 		return nil, nil, fmt.Errorf("Failed to parse block %v: %v\n", id, err.Error())
    795 	}
    796 
    797 	unmarshalDate := time.Now()
    798 	f.verbosef("Unmarshaled %v objects for %v in %v\n",
    799 		len(cachedNodes), id, unmarshalDate.Sub(helperStartTime))
    800 
    801 	tempMap := newPathMap("/")
    802 	stats := make([]statResponse, len(cachedNodes))
    803 
    804 	for i, node := range cachedNodes {
    805 		// check the file system for an updated timestamp
    806 		stats[i] = f.statDirSync(node.Path)
    807 	}
    808 
    809 	dirsToWalk = []string{}
    810 	for i, cachedNode := range cachedNodes {
    811 		updated := stats[i]
    812 		// save the cached value
    813 		container := tempMap.GetNode(cachedNode.Path, true)
    814 		container.mapNode = mapNode{statResponse: updated}
    815 
    816 		// if the metadata changed and the directory still exists, then
    817 		// make a note to walk it later
    818 		if !f.isInfoUpToDate(cachedNode.statResponse, updated) && updated.ModTime != 0 {
    819 			f.setModified()
    820 			// make a note that the directory needs to be walked
    821 			dirsToWalk = append(dirsToWalk, cachedNode.Path)
    822 		} else {
    823 			container.mapNode.FileNames = cachedNode.FileNames
    824 		}
    825 	}
    826 	// count the number of nodes to improve our understanding of the shape of the tree,
    827 	// thereby improving parallelism of subsequent searches
    828 	tempMap.UpdateNumDescendentsRecursive()
    829 
    830 	f.verbosef("Statted inodes of block %v in %v\n", id, time.Now().Sub(unmarshalDate))
    831 	return tempMap, dirsToWalk, nil
    832 }
    833 
    834 // startFromExternalCache loads the cache database from disk
    835 // startFromExternalCache waits to return until the load of the cache db is complete, but
    836 // startFromExternalCache does not wait for all every listDir() or statDir() request to complete
    837 func (f *Finder) startFromExternalCache() (err error) {
    838 	startTime := time.Now()
    839 	dbPath := f.DbPath
    840 
    841 	// open cache file and validate its header
    842 	reader, err := f.filesystem.Open(dbPath)
    843 	if err != nil {
    844 		return errors.New("No data to load from database\n")
    845 	}
    846 	bufferedReader := bufio.NewReader(reader)
    847 	if !f.validateCacheHeader(bufferedReader) {
    848 		return errors.New("Cache header does not match")
    849 	}
    850 	f.verbosef("Database header matches, will attempt to use database %v\n", f.DbPath)
    851 
    852 	// read the file and spawn threads to process it
    853 	nodesToWalk := [][]*pathMap{}
    854 	mainTree := newPathMap("/")
    855 
    856 	// read the blocks and stream them into <blockChannel>
    857 	type dataBlock struct {
    858 		id   int
    859 		err  error
    860 		data []byte
    861 	}
    862 	blockChannel := make(chan dataBlock, f.numDbLoadingThreads)
    863 	readBlocks := func() {
    864 		index := 0
    865 		for {
    866 			// It takes some time to unmarshal the input from json, so we want
    867 			// to unmarshal it in parallel. In order to find valid places to
    868 			// break the input, we scan for the line separators that we inserted
    869 			// (for this purpose) when we dumped the database.
    870 			data, err := f.readLine(bufferedReader)
    871 			var response dataBlock
    872 			done := false
    873 			if err != nil && err != io.EOF {
    874 				response = dataBlock{id: index, err: err, data: nil}
    875 				done = true
    876 			} else {
    877 				done = (err == io.EOF)
    878 				response = dataBlock{id: index, err: nil, data: data}
    879 			}
    880 			blockChannel <- response
    881 			index++
    882 			duration := time.Since(startTime)
    883 			f.verbosef("Read block %v after %v\n", index, duration)
    884 			if done {
    885 				f.verbosef("Read %v blocks in %v\n", index, duration)
    886 				close(blockChannel)
    887 				return
    888 			}
    889 		}
    890 	}
    891 	go readBlocks()
    892 
    893 	// Read from <blockChannel> and stream the responses into <resultChannel>.
    894 	type workResponse struct {
    895 		id          int
    896 		err         error
    897 		tree        *pathMap
    898 		updatedDirs []string
    899 	}
    900 	resultChannel := make(chan workResponse)
    901 	processBlocks := func() {
    902 		numProcessed := 0
    903 		threadPool := newThreadPool(f.numDbLoadingThreads)
    904 		for {
    905 			// get a block to process
    906 			block, received := <-blockChannel
    907 			if !received {
    908 				break
    909 			}
    910 
    911 			if block.err != nil {
    912 				resultChannel <- workResponse{err: block.err}
    913 				break
    914 			}
    915 			numProcessed++
    916 			// wait until there is CPU available to process it
    917 			threadPool.Run(
    918 				func() {
    919 					processStartTime := time.Now()
    920 					f.verbosef("Starting to process block %v after %v\n",
    921 						block.id, processStartTime.Sub(startTime))
    922 					tempMap, updatedDirs, err := f.loadBytes(block.id, block.data)
    923 					var response workResponse
    924 					if err != nil {
    925 						f.verbosef(
    926 							"Block %v failed to parse with error %v\n",
    927 							block.id, err)
    928 						response = workResponse{err: err}
    929 					} else {
    930 						response = workResponse{
    931 							id:          block.id,
    932 							err:         nil,
    933 							tree:        tempMap,
    934 							updatedDirs: updatedDirs,
    935 						}
    936 					}
    937 					f.verbosef("Processed block %v in %v\n",
    938 						block.id, time.Since(processStartTime),
    939 					)
    940 					resultChannel <- response
    941 				},
    942 			)
    943 		}
    944 		threadPool.Wait()
    945 		f.verbosef("Finished processing %v blocks in %v\n",
    946 			numProcessed, time.Since(startTime))
    947 		close(resultChannel)
    948 	}
    949 	go processBlocks()
    950 
    951 	// Read from <resultChannel> and use the results
    952 	combineResults := func() (err error) {
    953 		for {
    954 			result, received := <-resultChannel
    955 			if !received {
    956 				break
    957 			}
    958 			if err != nil {
    959 				// In case of an error, wait for work to complete before
    960 				// returning the error. This ensures that any subsequent
    961 				// work doesn't need to compete for resources (and possibly
    962 				// fail due to, for example, a filesystem limit on the number of
    963 				// concurrently open files) with past work.
    964 				continue
    965 			}
    966 			if result.err != nil {
    967 				err = result.err
    968 				continue
    969 			}
    970 			// update main tree
    971 			mainTree.MergeIn(result.tree)
    972 			// record any new directories that we will need to Stat()
    973 			updatedNodes := make([]*pathMap, len(result.updatedDirs))
    974 			for j, dir := range result.updatedDirs {
    975 				node := mainTree.GetNode(dir, false)
    976 				updatedNodes[j] = node
    977 			}
    978 			nodesToWalk = append(nodesToWalk, updatedNodes)
    979 		}
    980 		return err
    981 	}
    982 	err = combineResults()
    983 	if err != nil {
    984 		return err
    985 	}
    986 
    987 	f.nodes = *mainTree
    988 
    989 	// after having loaded the entire db and therefore created entries for
    990 	// the directories we know of, now it's safe to start calling ReadDir on
    991 	// any updated directories
    992 	for i := range nodesToWalk {
    993 		f.listDirsAsync(nodesToWalk[i])
    994 	}
    995 	f.verbosef("Loaded db and statted known dirs in %v\n", time.Since(startTime))
    996 	f.threadPool.Wait()
    997 	f.verbosef("Loaded db and statted all dirs in %v\n", time.Now().Sub(startTime))
    998 
    999 	return err
   1000 }
   1001 
   1002 // startWithoutExternalCache starts scanning the filesystem according to the cache config
   1003 // startWithoutExternalCache should be called if startFromExternalCache is not applicable
   1004 func (f *Finder) startWithoutExternalCache() {
   1005 	startTime := time.Now()
   1006 	configDirs := f.cacheMetadata.Config.RootDirs
   1007 
   1008 	// clean paths
   1009 	candidates := make([]string, len(configDirs))
   1010 	for i, dir := range configDirs {
   1011 		candidates[i] = filepath.Clean(dir)
   1012 	}
   1013 	// remove duplicates
   1014 	dirsToScan := make([]string, 0, len(configDirs))
   1015 	for _, candidate := range candidates {
   1016 		include := true
   1017 		for _, included := range dirsToScan {
   1018 			if included == "/" || strings.HasPrefix(candidate+"/", included+"/") {
   1019 				include = false
   1020 				break
   1021 			}
   1022 		}
   1023 		if include {
   1024 			dirsToScan = append(dirsToScan, candidate)
   1025 		}
   1026 	}
   1027 
   1028 	// start searching finally
   1029 	for _, path := range dirsToScan {
   1030 		f.verbosef("Starting find of %v\n", path)
   1031 		f.startFind(path)
   1032 	}
   1033 
   1034 	f.threadPool.Wait()
   1035 
   1036 	f.verbosef("Scanned filesystem (not using cache) in %v\n", time.Now().Sub(startTime))
   1037 }
   1038 
   1039 // isInfoUpToDate tells whether <new> can confirm that results computed at <old> are still valid
   1040 func (f *Finder) isInfoUpToDate(old statResponse, new statResponse) (equal bool) {
   1041 	if old.Inode != new.Inode {
   1042 		return false
   1043 	}
   1044 	if old.ModTime != new.ModTime {
   1045 		return false
   1046 	}
   1047 	if old.Device != new.Device {
   1048 		return false
   1049 	}
   1050 	return true
   1051 }
   1052 
   1053 func (f *Finder) wasModified() bool {
   1054 	return atomic.LoadInt32(&f.modifiedFlag) > 0
   1055 }
   1056 
   1057 func (f *Finder) setModified() {
   1058 	var newVal int32
   1059 	newVal = 1
   1060 	atomic.StoreInt32(&f.modifiedFlag, newVal)
   1061 }
   1062 
   1063 // sortedDirEntries exports directory entries to facilitate dumping them to the external cache
   1064 func (f *Finder) sortedDirEntries() []dirFullInfo {
   1065 	startTime := time.Now()
   1066 	nodes := make([]dirFullInfo, 0)
   1067 	for _, node := range f.nodes.DumpAll() {
   1068 		if node.ModTime != 0 {
   1069 			nodes = append(nodes, node)
   1070 		}
   1071 	}
   1072 	discoveryDate := time.Now()
   1073 	f.verbosef("Generated %v cache entries in %v\n", len(nodes), discoveryDate.Sub(startTime))
   1074 	less := func(i int, j int) bool {
   1075 		return nodes[i].Path < nodes[j].Path
   1076 	}
   1077 	sort.Slice(nodes, less)
   1078 	sortDate := time.Now()
   1079 	f.verbosef("Sorted %v cache entries in %v\n", len(nodes), sortDate.Sub(discoveryDate))
   1080 
   1081 	return nodes
   1082 }
   1083 
   1084 // serializeDb converts the cache database into a form to save to disk
   1085 func (f *Finder) serializeDb() ([]byte, error) {
   1086 	// sort dir entries
   1087 	var entryList = f.sortedDirEntries()
   1088 
   1089 	// Generate an output file that can be conveniently loaded using the same number of threads
   1090 	// as were used in this execution (because presumably that will be the number of threads
   1091 	// used in the next execution too)
   1092 
   1093 	// generate header
   1094 	header := []byte{}
   1095 	header = append(header, []byte(f.cacheMetadata.Version)...)
   1096 	header = append(header, lineSeparator)
   1097 	configDump, err := f.cacheMetadata.Config.Dump()
   1098 	if err != nil {
   1099 		return nil, err
   1100 	}
   1101 	header = append(header, configDump...)
   1102 
   1103 	// serialize individual blocks in parallel
   1104 	numBlocks := f.numDbLoadingThreads
   1105 	if numBlocks > len(entryList) {
   1106 		numBlocks = len(entryList)
   1107 	}
   1108 	blocks := make([][]byte, 1+numBlocks)
   1109 	blocks[0] = header
   1110 	blockMin := 0
   1111 	wg := sync.WaitGroup{}
   1112 	var errLock sync.Mutex
   1113 
   1114 	for i := 1; i <= numBlocks; i++ {
   1115 		// identify next block
   1116 		blockMax := len(entryList) * i / numBlocks
   1117 		block := entryList[blockMin:blockMax]
   1118 
   1119 		// process block
   1120 		wg.Add(1)
   1121 		go func(index int, block []dirFullInfo) {
   1122 			byteBlock, subErr := f.serializeCacheEntry(block)
   1123 			f.verbosef("Serialized block %v into %v bytes\n", index, len(byteBlock))
   1124 			if subErr != nil {
   1125 				f.verbosef("%v\n", subErr.Error())
   1126 				errLock.Lock()
   1127 				err = subErr
   1128 				errLock.Unlock()
   1129 			} else {
   1130 				blocks[index] = byteBlock
   1131 			}
   1132 			wg.Done()
   1133 		}(i, block)
   1134 
   1135 		blockMin = blockMax
   1136 	}
   1137 
   1138 	wg.Wait()
   1139 
   1140 	if err != nil {
   1141 		return nil, err
   1142 	}
   1143 
   1144 	content := bytes.Join(blocks, []byte{lineSeparator})
   1145 
   1146 	return content, nil
   1147 }
   1148 
   1149 // dumpDb saves the cache database to disk
   1150 func (f *Finder) dumpDb() error {
   1151 	startTime := time.Now()
   1152 	f.verbosef("Dumping db\n")
   1153 
   1154 	tempPath := f.DbPath + ".tmp"
   1155 
   1156 	bytes, err := f.serializeDb()
   1157 	if err != nil {
   1158 		return err
   1159 	}
   1160 	serializeDate := time.Now()
   1161 	f.verbosef("Serialized db in %v\n", serializeDate.Sub(startTime))
   1162 	// dump file and atomically move
   1163 	err = f.filesystem.WriteFile(tempPath, bytes, 0777)
   1164 	if err != nil {
   1165 		return err
   1166 	}
   1167 	err = f.filesystem.Rename(tempPath, f.DbPath)
   1168 	if err != nil {
   1169 		return err
   1170 	}
   1171 
   1172 	f.verbosef("Wrote db in %v\n", time.Now().Sub(serializeDate))
   1173 	return nil
   1174 
   1175 }
   1176 
   1177 // canIgnoreFsErr checks for certain classes of filesystem errors that are safe to ignore
   1178 func (f *Finder) canIgnoreFsErr(err error) bool {
   1179 	pathErr, isPathErr := err.(*os.PathError)
   1180 	if !isPathErr {
   1181 		// Don't recognize this error
   1182 		return false
   1183 	}
   1184 	if os.IsPermission(pathErr) {
   1185 		// Permission errors are ignored:
   1186 		// https://issuetracker.google.com/37553659
   1187 		// https://github.com/google/kati/pull/116
   1188 		return true
   1189 	}
   1190 	if pathErr.Err == os.ErrNotExist {
   1191 		// If a directory doesn't exist, that generally means the cache is out-of-date
   1192 		return true
   1193 	}
   1194 	// Don't recognize this error
   1195 	return false
   1196 }
   1197 
   1198 // onFsError should be called whenever a potentially fatal error is returned from a filesystem call
   1199 func (f *Finder) onFsError(path string, err error) {
   1200 	if !f.canIgnoreFsErr(err) {
   1201 		// We could send the errors through a channel instead, although that would cause this call
   1202 		// to block unless we preallocated a sufficient buffer or spawned a reader thread.
   1203 		// Although it wouldn't be too complicated to spawn a reader thread, it's still slightly
   1204 		// more convenient to use a lock. Only in an unusual situation should this code be
   1205 		// invoked anyway.
   1206 		f.errlock.Lock()
   1207 		f.fsErrs = append(f.fsErrs, fsErr{path: path, err: err})
   1208 		f.errlock.Unlock()
   1209 	}
   1210 }
   1211 
   1212 // discardErrsForPrunedPaths removes any errors for paths that are no longer included in the cache
   1213 func (f *Finder) discardErrsForPrunedPaths() {
   1214 	// This function could be somewhat inefficient due to being single-threaded,
   1215 	// but the length of f.fsErrs should be approximately 0, so it shouldn't take long anyway.
   1216 	relevantErrs := make([]fsErr, 0, len(f.fsErrs))
   1217 	for _, fsErr := range f.fsErrs {
   1218 		path := fsErr.path
   1219 		node := f.nodes.GetNode(path, false)
   1220 		if node != nil {
   1221 			// The path in question wasn't pruned due to a failure to process a parent directory.
   1222 			// So, the failure to process this path is important
   1223 			relevantErrs = append(relevantErrs, fsErr)
   1224 		}
   1225 	}
   1226 	f.fsErrs = relevantErrs
   1227 }
   1228 
   1229 // getErr returns an error based on previous calls to onFsErr, if any
   1230 func (f *Finder) getErr() error {
   1231 	f.discardErrsForPrunedPaths()
   1232 
   1233 	numErrs := len(f.fsErrs)
   1234 	if numErrs < 1 {
   1235 		return nil
   1236 	}
   1237 
   1238 	maxNumErrsToInclude := 10
   1239 	message := ""
   1240 	if numErrs > maxNumErrsToInclude {
   1241 		message = fmt.Sprintf("finder encountered %v errors: %v...", numErrs, f.fsErrs[:maxNumErrsToInclude])
   1242 	} else {
   1243 		message = fmt.Sprintf("finder encountered %v errors: %v", numErrs, f.fsErrs)
   1244 	}
   1245 
   1246 	return errors.New(message)
   1247 }
   1248 
   1249 func (f *Finder) statDirAsync(dir *pathMap) {
   1250 	node := dir
   1251 	path := dir.path
   1252 	f.threadPool.Run(
   1253 		func() {
   1254 			updatedStats := f.statDirSync(path)
   1255 
   1256 			if !f.isInfoUpToDate(node.statResponse, updatedStats) {
   1257 				node.mapNode = mapNode{
   1258 					statResponse: updatedStats,
   1259 					FileNames:    []string{},
   1260 				}
   1261 				f.setModified()
   1262 				if node.statResponse.ModTime != 0 {
   1263 					// modification time was updated, so re-scan for
   1264 					// child directories
   1265 					f.listDirAsync(dir)
   1266 				}
   1267 			}
   1268 		},
   1269 	)
   1270 }
   1271 
   1272 func (f *Finder) statDirSync(path string) statResponse {
   1273 
   1274 	fileInfo, err := f.filesystem.Lstat(path)
   1275 
   1276 	var stats statResponse
   1277 	if err != nil {
   1278 		// possibly record this error
   1279 		f.onFsError(path, err)
   1280 		// in case of a failure to stat the directory, treat the directory as missing (modTime = 0)
   1281 		return stats
   1282 	}
   1283 	modTime := fileInfo.ModTime()
   1284 	stats = statResponse{}
   1285 	inode, err := f.filesystem.InodeNumber(fileInfo)
   1286 	if err != nil {
   1287 		panic(fmt.Sprintf("Could not get inode number of %v: %v\n", path, err.Error()))
   1288 	}
   1289 	stats.Inode = inode
   1290 	device, err := f.filesystem.DeviceNumber(fileInfo)
   1291 	if err != nil {
   1292 		panic(fmt.Sprintf("Could not get device number of %v: %v\n", path, err.Error()))
   1293 	}
   1294 	stats.Device = device
   1295 	permissionsChangeTime, err := f.filesystem.PermTime(fileInfo)
   1296 
   1297 	if err != nil {
   1298 		panic(fmt.Sprintf("Could not get permissions modification time (CTime) of %v: %v\n", path, err.Error()))
   1299 	}
   1300 	// We're only interested in knowing whether anything about the directory
   1301 	// has changed since last check, so we use the latest of the two
   1302 	// modification times (content modification (mtime) and
   1303 	// permission modification (ctime))
   1304 	if permissionsChangeTime.After(modTime) {
   1305 		modTime = permissionsChangeTime
   1306 	}
   1307 	stats.ModTime = modTime.UnixNano()
   1308 
   1309 	return stats
   1310 }
   1311 
   1312 // pruneCacheCandidates removes the items that we don't want to include in our persistent cache
   1313 func (f *Finder) pruneCacheCandidates(items *DirEntries) {
   1314 
   1315 	for _, fileName := range items.FileNames {
   1316 		for _, abortedName := range f.cacheMetadata.Config.PruneFiles {
   1317 			if fileName == abortedName {
   1318 				items.FileNames = []string{}
   1319 				items.DirNames = []string{}
   1320 				return
   1321 			}
   1322 		}
   1323 	}
   1324 
   1325 	// remove any files that aren't the ones we want to include
   1326 	writeIndex := 0
   1327 	for _, fileName := range items.FileNames {
   1328 		// include only these files
   1329 		for _, includedName := range f.cacheMetadata.Config.IncludeFiles {
   1330 			if fileName == includedName {
   1331 				items.FileNames[writeIndex] = fileName
   1332 				writeIndex++
   1333 				break
   1334 			}
   1335 		}
   1336 	}
   1337 	// resize
   1338 	items.FileNames = items.FileNames[:writeIndex]
   1339 
   1340 	writeIndex = 0
   1341 	for _, dirName := range items.DirNames {
   1342 		items.DirNames[writeIndex] = dirName
   1343 		// ignore other dirs that are known to not be inputs to the build process
   1344 		include := true
   1345 		for _, excludedName := range f.cacheMetadata.Config.ExcludeDirs {
   1346 			if dirName == excludedName {
   1347 				// don't include
   1348 				include = false
   1349 				break
   1350 			}
   1351 		}
   1352 		if include {
   1353 			writeIndex++
   1354 		}
   1355 	}
   1356 	// resize
   1357 	items.DirNames = items.DirNames[:writeIndex]
   1358 }
   1359 
   1360 func (f *Finder) listDirsAsync(nodes []*pathMap) {
   1361 	f.threadPool.Run(
   1362 		func() {
   1363 			for i := range nodes {
   1364 				f.listDirSync(nodes[i])
   1365 			}
   1366 		},
   1367 	)
   1368 }
   1369 
   1370 func (f *Finder) listDirAsync(node *pathMap) {
   1371 	f.threadPool.Run(
   1372 		func() {
   1373 			f.listDirSync(node)
   1374 		},
   1375 	)
   1376 }
   1377 
   1378 func (f *Finder) listDirSync(dir *pathMap) {
   1379 	path := dir.path
   1380 	children, err := f.filesystem.ReadDir(path)
   1381 
   1382 	if err != nil {
   1383 		// possibly record this error
   1384 		f.onFsError(path, err)
   1385 		// if listing the contents of the directory fails (presumably due to
   1386 		// permission denied), then treat the directory as empty
   1387 		children = nil
   1388 	}
   1389 
   1390 	var subdirs []string
   1391 	var subfiles []string
   1392 
   1393 	for _, child := range children {
   1394 		linkBits := child.Mode() & os.ModeSymlink
   1395 		isLink := linkBits != 0
   1396 		if child.IsDir() {
   1397 			if !isLink {
   1398 				// Skip symlink dirs.
   1399 				// We don't have to support symlink dirs because
   1400 				// that would cause duplicates.
   1401 				subdirs = append(subdirs, child.Name())
   1402 			}
   1403 		} else {
   1404 			// We do have to support symlink files because the link name might be
   1405 			// different than the target name
   1406 			// (for example, Android.bp -> build/soong/root.bp)
   1407 			subfiles = append(subfiles, child.Name())
   1408 		}
   1409 
   1410 	}
   1411 	parentNode := dir
   1412 
   1413 	entry := &DirEntries{Path: path, DirNames: subdirs, FileNames: subfiles}
   1414 	f.pruneCacheCandidates(entry)
   1415 
   1416 	// create a pathMap node for each relevant subdirectory
   1417 	relevantChildren := map[string]*pathMap{}
   1418 	for _, subdirName := range entry.DirNames {
   1419 		childNode, found := parentNode.children[subdirName]
   1420 		// if we already knew of this directory, then we already have a request pending to Stat it
   1421 		// if we didn't already know of this directory, then we must Stat it now
   1422 		if !found {
   1423 			childNode = parentNode.newChild(subdirName)
   1424 			f.statDirAsync(childNode)
   1425 		}
   1426 		relevantChildren[subdirName] = childNode
   1427 	}
   1428 	// Note that in rare cases, it's possible that we're reducing the set of
   1429 	// children via this statement, if these are all true:
   1430 	// 1. we previously had a cache that knew about subdirectories of parentNode
   1431 	// 2. the user created a prune-file (described in pruneCacheCandidates)
   1432 	//    inside <parentNode>, which specifies that the contents of parentNode
   1433 	//    are to be ignored.
   1434 	// The fact that it's possible to remove children here means that *pathMap structs
   1435 	// must not be looked up from f.nodes by filepath (and instead must be accessed by
   1436 	// direct pointer) until after every listDirSync completes
   1437 	parentNode.FileNames = entry.FileNames
   1438 	parentNode.children = relevantChildren
   1439 
   1440 }
   1441 
   1442 // listMatches takes a node and a function that specifies which subdirectories and
   1443 // files to include, and listMatches returns the matches
   1444 func (f *Finder) listMatches(node *pathMap,
   1445 	filter WalkFunc) (subDirs []*pathMap, filePaths []string) {
   1446 	entries := DirEntries{
   1447 		FileNames: node.FileNames,
   1448 	}
   1449 	entries.DirNames = make([]string, 0, len(node.children))
   1450 	for childName := range node.children {
   1451 		entries.DirNames = append(entries.DirNames, childName)
   1452 	}
   1453 
   1454 	dirNames, fileNames := filter(entries)
   1455 
   1456 	subDirs = []*pathMap{}
   1457 	filePaths = make([]string, 0, len(fileNames))
   1458 	for _, fileName := range fileNames {
   1459 		filePaths = append(filePaths, joinCleanPaths(node.path, fileName))
   1460 	}
   1461 	subDirs = make([]*pathMap, 0, len(dirNames))
   1462 	for _, childName := range dirNames {
   1463 		child, ok := node.children[childName]
   1464 		if ok {
   1465 			subDirs = append(subDirs, child)
   1466 		}
   1467 	}
   1468 
   1469 	return subDirs, filePaths
   1470 }
   1471 
   1472 // findInCacheMultithreaded spawns potentially multiple goroutines with which to search the cache.
   1473 func (f *Finder) findInCacheMultithreaded(node *pathMap, filter WalkFunc,
   1474 	approxNumThreads int) []string {
   1475 
   1476 	if approxNumThreads < 2 {
   1477 		// Done spawning threads; process remaining directories
   1478 		return f.findInCacheSinglethreaded(node, filter)
   1479 	}
   1480 
   1481 	totalWork := 0
   1482 	for _, child := range node.children {
   1483 		totalWork += child.approximateNumDescendents
   1484 	}
   1485 	childrenResults := make(chan []string, len(node.children))
   1486 
   1487 	subDirs, filePaths := f.listMatches(node, filter)
   1488 
   1489 	// process child directories
   1490 	for _, child := range subDirs {
   1491 		numChildThreads := approxNumThreads * child.approximateNumDescendents / totalWork
   1492 		childProcessor := func(child *pathMap) {
   1493 			childResults := f.findInCacheMultithreaded(child, filter, numChildThreads)
   1494 			childrenResults <- childResults
   1495 		}
   1496 		// If we're allowed to use more than 1 thread to process this directory,
   1497 		// then instead we use 1 thread for each subdirectory.
   1498 		// It would be strange to spawn threads for only some subdirectories.
   1499 		go childProcessor(child)
   1500 	}
   1501 
   1502 	// collect results
   1503 	for i := 0; i < len(subDirs); i++ {
   1504 		childResults := <-childrenResults
   1505 		filePaths = append(filePaths, childResults...)
   1506 	}
   1507 	close(childrenResults)
   1508 
   1509 	return filePaths
   1510 }
   1511 
   1512 // findInCacheSinglethreaded synchronously searches the cache for all matching file paths
   1513 // note findInCacheSinglethreaded runs 2X to 4X as fast by being iterative rather than recursive
   1514 func (f *Finder) findInCacheSinglethreaded(node *pathMap, filter WalkFunc) []string {
   1515 	if node == nil {
   1516 		return []string{}
   1517 	}
   1518 
   1519 	nodes := []*pathMap{node}
   1520 	matches := []string{}
   1521 
   1522 	for len(nodes) > 0 {
   1523 		currentNode := nodes[0]
   1524 		nodes = nodes[1:]
   1525 
   1526 		subDirs, filePaths := f.listMatches(currentNode, filter)
   1527 
   1528 		nodes = append(nodes, subDirs...)
   1529 
   1530 		matches = append(matches, filePaths...)
   1531 	}
   1532 	return matches
   1533 }
   1534