1 //===--- FileManager.cpp - File System Probing and Caching ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the FileManager interface. 11 // 12 //===----------------------------------------------------------------------===// 13 // 14 // TODO: This should index all interesting directories with dirent calls. 15 // getdirentries ? 16 // opendir/readdir_r/closedir ? 17 // 18 //===----------------------------------------------------------------------===// 19 20 #include "clang/Basic/FileManager.h" 21 #include "clang/Basic/FileSystemStatCache.h" 22 #include "llvm/ADT/SmallString.h" 23 #include "llvm/Config/llvm-config.h" 24 #include "llvm/Support/FileSystem.h" 25 #include "llvm/Support/MemoryBuffer.h" 26 #include "llvm/Support/Path.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <map> 29 #include <set> 30 #include <string> 31 #include <system_error> 32 33 using namespace clang; 34 35 /// NON_EXISTENT_DIR - A special value distinct from null that is used to 36 /// represent a dir name that doesn't exist on the disk. 37 #define NON_EXISTENT_DIR reinterpret_cast<DirectoryEntry*>((intptr_t)-1) 38 39 /// NON_EXISTENT_FILE - A special value distinct from null that is used to 40 /// represent a filename that doesn't exist on the disk. 41 #define NON_EXISTENT_FILE reinterpret_cast<FileEntry*>((intptr_t)-1) 42 43 //===----------------------------------------------------------------------===// 44 // Common logic. 45 //===----------------------------------------------------------------------===// 46 47 FileManager::FileManager(const FileSystemOptions &FSO, 48 IntrusiveRefCntPtr<vfs::FileSystem> FS) 49 : FS(FS), FileSystemOpts(FSO), 50 SeenDirEntries(64), SeenFileEntries(64), NextFileUID(0) { 51 NumDirLookups = NumFileLookups = 0; 52 NumDirCacheMisses = NumFileCacheMisses = 0; 53 54 // If the caller doesn't provide a virtual file system, just grab the real 55 // file system. 56 if (!FS) 57 this->FS = vfs::getRealFileSystem(); 58 } 59 60 FileManager::~FileManager() { 61 for (unsigned i = 0, e = VirtualFileEntries.size(); i != e; ++i) 62 delete VirtualFileEntries[i]; 63 for (unsigned i = 0, e = VirtualDirectoryEntries.size(); i != e; ++i) 64 delete VirtualDirectoryEntries[i]; 65 } 66 67 void FileManager::addStatCache(std::unique_ptr<FileSystemStatCache> statCache, 68 bool AtBeginning) { 69 assert(statCache && "No stat cache provided?"); 70 if (AtBeginning || !StatCache.get()) { 71 statCache->setNextStatCache(std::move(StatCache)); 72 StatCache = std::move(statCache); 73 return; 74 } 75 76 FileSystemStatCache *LastCache = StatCache.get(); 77 while (LastCache->getNextStatCache()) 78 LastCache = LastCache->getNextStatCache(); 79 80 LastCache->setNextStatCache(std::move(statCache)); 81 } 82 83 void FileManager::removeStatCache(FileSystemStatCache *statCache) { 84 if (!statCache) 85 return; 86 87 if (StatCache.get() == statCache) { 88 // This is the first stat cache. 89 StatCache = StatCache->takeNextStatCache(); 90 return; 91 } 92 93 // Find the stat cache in the list. 94 FileSystemStatCache *PrevCache = StatCache.get(); 95 while (PrevCache && PrevCache->getNextStatCache() != statCache) 96 PrevCache = PrevCache->getNextStatCache(); 97 98 assert(PrevCache && "Stat cache not found for removal"); 99 PrevCache->setNextStatCache(statCache->takeNextStatCache()); 100 } 101 102 void FileManager::clearStatCaches() { 103 StatCache.reset(); 104 } 105 106 /// \brief Retrieve the directory that the given file name resides in. 107 /// Filename can point to either a real file or a virtual file. 108 static const DirectoryEntry *getDirectoryFromFile(FileManager &FileMgr, 109 StringRef Filename, 110 bool CacheFailure) { 111 if (Filename.empty()) 112 return nullptr; 113 114 if (llvm::sys::path::is_separator(Filename[Filename.size() - 1])) 115 return nullptr; // If Filename is a directory. 116 117 StringRef DirName = llvm::sys::path::parent_path(Filename); 118 // Use the current directory if file has no path component. 119 if (DirName.empty()) 120 DirName = "."; 121 122 return FileMgr.getDirectory(DirName, CacheFailure); 123 } 124 125 /// Add all ancestors of the given path (pointing to either a file or 126 /// a directory) as virtual directories. 127 void FileManager::addAncestorsAsVirtualDirs(StringRef Path) { 128 StringRef DirName = llvm::sys::path::parent_path(Path); 129 if (DirName.empty()) 130 return; 131 132 auto &NamedDirEnt = 133 *SeenDirEntries.insert(std::make_pair(DirName, nullptr)).first; 134 135 // When caching a virtual directory, we always cache its ancestors 136 // at the same time. Therefore, if DirName is already in the cache, 137 // we don't need to recurse as its ancestors must also already be in 138 // the cache. 139 if (NamedDirEnt.second) 140 return; 141 142 // Add the virtual directory to the cache. 143 DirectoryEntry *UDE = new DirectoryEntry; 144 UDE->Name = NamedDirEnt.first().data(); 145 NamedDirEnt.second = UDE; 146 VirtualDirectoryEntries.push_back(UDE); 147 148 // Recursively add the other ancestors. 149 addAncestorsAsVirtualDirs(DirName); 150 } 151 152 const DirectoryEntry *FileManager::getDirectory(StringRef DirName, 153 bool CacheFailure) { 154 // stat doesn't like trailing separators except for root directory. 155 // At least, on Win32 MSVCRT, stat() cannot strip trailing '/'. 156 // (though it can strip '\\') 157 if (DirName.size() > 1 && 158 DirName != llvm::sys::path::root_path(DirName) && 159 llvm::sys::path::is_separator(DirName.back())) 160 DirName = DirName.substr(0, DirName.size()-1); 161 #ifdef LLVM_ON_WIN32 162 // Fixing a problem with "clang C:test.c" on Windows. 163 // Stat("C:") does not recognize "C:" as a valid directory 164 std::string DirNameStr; 165 if (DirName.size() > 1 && DirName.back() == ':' && 166 DirName.equals_lower(llvm::sys::path::root_name(DirName))) { 167 DirNameStr = DirName.str() + '.'; 168 DirName = DirNameStr; 169 } 170 #endif 171 172 ++NumDirLookups; 173 auto &NamedDirEnt = 174 *SeenDirEntries.insert(std::make_pair(DirName, nullptr)).first; 175 176 // See if there was already an entry in the map. Note that the map 177 // contains both virtual and real directories. 178 if (NamedDirEnt.second) 179 return NamedDirEnt.second == NON_EXISTENT_DIR ? nullptr 180 : NamedDirEnt.second; 181 182 ++NumDirCacheMisses; 183 184 // By default, initialize it to invalid. 185 NamedDirEnt.second = NON_EXISTENT_DIR; 186 187 // Get the null-terminated directory name as stored as the key of the 188 // SeenDirEntries map. 189 const char *InterndDirName = NamedDirEnt.first().data(); 190 191 // Check to see if the directory exists. 192 FileData Data; 193 if (getStatValue(InterndDirName, Data, false, nullptr /*directory lookup*/)) { 194 // There's no real directory at the given path. 195 if (!CacheFailure) 196 SeenDirEntries.erase(DirName); 197 return nullptr; 198 } 199 200 // It exists. See if we have already opened a directory with the 201 // same inode (this occurs on Unix-like systems when one dir is 202 // symlinked to another, for example) or the same path (on 203 // Windows). 204 DirectoryEntry &UDE = UniqueRealDirs[Data.UniqueID]; 205 206 NamedDirEnt.second = &UDE; 207 if (!UDE.getName()) { 208 // We don't have this directory yet, add it. We use the string 209 // key from the SeenDirEntries map as the string. 210 UDE.Name = InterndDirName; 211 } 212 213 return &UDE; 214 } 215 216 const FileEntry *FileManager::getFile(StringRef Filename, bool openFile, 217 bool CacheFailure) { 218 ++NumFileLookups; 219 220 // See if there is already an entry in the map. 221 auto &NamedFileEnt = 222 *SeenFileEntries.insert(std::make_pair(Filename, nullptr)).first; 223 224 // See if there is already an entry in the map. 225 if (NamedFileEnt.second) 226 return NamedFileEnt.second == NON_EXISTENT_FILE ? nullptr 227 : NamedFileEnt.second; 228 229 ++NumFileCacheMisses; 230 231 // By default, initialize it to invalid. 232 NamedFileEnt.second = NON_EXISTENT_FILE; 233 234 // Get the null-terminated file name as stored as the key of the 235 // SeenFileEntries map. 236 const char *InterndFileName = NamedFileEnt.first().data(); 237 238 // Look up the directory for the file. When looking up something like 239 // sys/foo.h we'll discover all of the search directories that have a 'sys' 240 // subdirectory. This will let us avoid having to waste time on known-to-fail 241 // searches when we go to find sys/bar.h, because all the search directories 242 // without a 'sys' subdir will get a cached failure result. 243 const DirectoryEntry *DirInfo = getDirectoryFromFile(*this, Filename, 244 CacheFailure); 245 if (DirInfo == nullptr) { // Directory doesn't exist, file can't exist. 246 if (!CacheFailure) 247 SeenFileEntries.erase(Filename); 248 249 return nullptr; 250 } 251 252 // FIXME: Use the directory info to prune this, before doing the stat syscall. 253 // FIXME: This will reduce the # syscalls. 254 255 // Nope, there isn't. Check to see if the file exists. 256 std::unique_ptr<vfs::File> F; 257 FileData Data; 258 if (getStatValue(InterndFileName, Data, true, openFile ? &F : nullptr)) { 259 // There's no real file at the given path. 260 if (!CacheFailure) 261 SeenFileEntries.erase(Filename); 262 263 return nullptr; 264 } 265 266 assert((openFile || !F) && "undesired open file"); 267 268 // It exists. See if we have already opened a file with the same inode. 269 // This occurs when one dir is symlinked to another, for example. 270 FileEntry &UFE = UniqueRealFiles[Data.UniqueID]; 271 272 NamedFileEnt.second = &UFE; 273 274 // If the name returned by getStatValue is different than Filename, re-intern 275 // the name. 276 if (Data.Name != Filename) { 277 auto &NamedFileEnt = 278 *SeenFileEntries.insert(std::make_pair(Data.Name, nullptr)).first; 279 if (!NamedFileEnt.second) 280 NamedFileEnt.second = &UFE; 281 else 282 assert(NamedFileEnt.second == &UFE && 283 "filename from getStatValue() refers to wrong file"); 284 InterndFileName = NamedFileEnt.first().data(); 285 } 286 287 if (UFE.isValid()) { // Already have an entry with this inode, return it. 288 289 // FIXME: this hack ensures that if we look up a file by a virtual path in 290 // the VFS that the getDir() will have the virtual path, even if we found 291 // the file by a 'real' path first. This is required in order to find a 292 // module's structure when its headers/module map are mapped in the VFS. 293 // We should remove this as soon as we can properly support a file having 294 // multiple names. 295 if (DirInfo != UFE.Dir && Data.IsVFSMapped) 296 UFE.Dir = DirInfo; 297 298 // Always update the name to use the last name by which a file was accessed. 299 // FIXME: Neither this nor always using the first name is correct; we want 300 // to switch towards a design where we return a FileName object that 301 // encapsulates both the name by which the file was accessed and the 302 // corresponding FileEntry. 303 UFE.Name = InterndFileName; 304 305 return &UFE; 306 } 307 308 // Otherwise, we don't have this file yet, add it. 309 UFE.Name = InterndFileName; 310 UFE.Size = Data.Size; 311 UFE.ModTime = Data.ModTime; 312 UFE.Dir = DirInfo; 313 UFE.UID = NextFileUID++; 314 UFE.UniqueID = Data.UniqueID; 315 UFE.IsNamedPipe = Data.IsNamedPipe; 316 UFE.InPCH = Data.InPCH; 317 UFE.File = std::move(F); 318 UFE.IsValid = true; 319 return &UFE; 320 } 321 322 const FileEntry * 323 FileManager::getVirtualFile(StringRef Filename, off_t Size, 324 time_t ModificationTime) { 325 ++NumFileLookups; 326 327 // See if there is already an entry in the map. 328 auto &NamedFileEnt = 329 *SeenFileEntries.insert(std::make_pair(Filename, nullptr)).first; 330 331 // See if there is already an entry in the map. 332 if (NamedFileEnt.second && NamedFileEnt.second != NON_EXISTENT_FILE) 333 return NamedFileEnt.second; 334 335 ++NumFileCacheMisses; 336 337 // By default, initialize it to invalid. 338 NamedFileEnt.second = NON_EXISTENT_FILE; 339 340 addAncestorsAsVirtualDirs(Filename); 341 FileEntry *UFE = nullptr; 342 343 // Now that all ancestors of Filename are in the cache, the 344 // following call is guaranteed to find the DirectoryEntry from the 345 // cache. 346 const DirectoryEntry *DirInfo = getDirectoryFromFile(*this, Filename, 347 /*CacheFailure=*/true); 348 assert(DirInfo && 349 "The directory of a virtual file should already be in the cache."); 350 351 // Check to see if the file exists. If so, drop the virtual file 352 FileData Data; 353 const char *InterndFileName = NamedFileEnt.first().data(); 354 if (getStatValue(InterndFileName, Data, true, nullptr) == 0) { 355 Data.Size = Size; 356 Data.ModTime = ModificationTime; 357 UFE = &UniqueRealFiles[Data.UniqueID]; 358 359 NamedFileEnt.second = UFE; 360 361 // If we had already opened this file, close it now so we don't 362 // leak the descriptor. We're not going to use the file 363 // descriptor anyway, since this is a virtual file. 364 if (UFE->File) 365 UFE->closeFile(); 366 367 // If we already have an entry with this inode, return it. 368 if (UFE->isValid()) 369 return UFE; 370 371 UFE->UniqueID = Data.UniqueID; 372 UFE->IsNamedPipe = Data.IsNamedPipe; 373 UFE->InPCH = Data.InPCH; 374 } 375 376 if (!UFE) { 377 UFE = new FileEntry(); 378 VirtualFileEntries.push_back(UFE); 379 NamedFileEnt.second = UFE; 380 } 381 382 UFE->Name = InterndFileName; 383 UFE->Size = Size; 384 UFE->ModTime = ModificationTime; 385 UFE->Dir = DirInfo; 386 UFE->UID = NextFileUID++; 387 UFE->File.reset(); 388 return UFE; 389 } 390 391 void FileManager::FixupRelativePath(SmallVectorImpl<char> &path) const { 392 StringRef pathRef(path.data(), path.size()); 393 394 if (FileSystemOpts.WorkingDir.empty() 395 || llvm::sys::path::is_absolute(pathRef)) 396 return; 397 398 SmallString<128> NewPath(FileSystemOpts.WorkingDir); 399 llvm::sys::path::append(NewPath, pathRef); 400 path = NewPath; 401 } 402 403 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 404 FileManager::getBufferForFile(const FileEntry *Entry, bool isVolatile, 405 bool ShouldCloseOpenFile) { 406 uint64_t FileSize = Entry->getSize(); 407 // If there's a high enough chance that the file have changed since we 408 // got its size, force a stat before opening it. 409 if (isVolatile) 410 FileSize = -1; 411 412 const char *Filename = Entry->getName(); 413 // If the file is already open, use the open file descriptor. 414 if (Entry->File) { 415 auto Result = 416 Entry->File->getBuffer(Filename, FileSize, 417 /*RequiresNullTerminator=*/true, isVolatile); 418 // FIXME: we need a set of APIs that can make guarantees about whether a 419 // FileEntry is open or not. 420 if (ShouldCloseOpenFile) 421 Entry->closeFile(); 422 return Result; 423 } 424 425 // Otherwise, open the file. 426 427 if (FileSystemOpts.WorkingDir.empty()) 428 return FS->getBufferForFile(Filename, FileSize, 429 /*RequiresNullTerminator=*/true, isVolatile); 430 431 SmallString<128> FilePath(Entry->getName()); 432 FixupRelativePath(FilePath); 433 return FS->getBufferForFile(FilePath, FileSize, 434 /*RequiresNullTerminator=*/true, isVolatile); 435 } 436 437 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> 438 FileManager::getBufferForFile(StringRef Filename) { 439 if (FileSystemOpts.WorkingDir.empty()) 440 return FS->getBufferForFile(Filename); 441 442 SmallString<128> FilePath(Filename); 443 FixupRelativePath(FilePath); 444 return FS->getBufferForFile(FilePath.c_str()); 445 } 446 447 /// getStatValue - Get the 'stat' information for the specified path, 448 /// using the cache to accelerate it if possible. This returns true 449 /// if the path points to a virtual file or does not exist, or returns 450 /// false if it's an existent real file. If FileDescriptor is NULL, 451 /// do directory look-up instead of file look-up. 452 bool FileManager::getStatValue(const char *Path, FileData &Data, bool isFile, 453 std::unique_ptr<vfs::File> *F) { 454 // FIXME: FileSystemOpts shouldn't be passed in here, all paths should be 455 // absolute! 456 if (FileSystemOpts.WorkingDir.empty()) 457 return FileSystemStatCache::get(Path, Data, isFile, F,StatCache.get(), *FS); 458 459 SmallString<128> FilePath(Path); 460 FixupRelativePath(FilePath); 461 462 return FileSystemStatCache::get(FilePath.c_str(), Data, isFile, F, 463 StatCache.get(), *FS); 464 } 465 466 bool FileManager::getNoncachedStatValue(StringRef Path, 467 vfs::Status &Result) { 468 SmallString<128> FilePath(Path); 469 FixupRelativePath(FilePath); 470 471 llvm::ErrorOr<vfs::Status> S = FS->status(FilePath.c_str()); 472 if (!S) 473 return true; 474 Result = *S; 475 return false; 476 } 477 478 void FileManager::invalidateCache(const FileEntry *Entry) { 479 assert(Entry && "Cannot invalidate a NULL FileEntry"); 480 481 SeenFileEntries.erase(Entry->getName()); 482 483 // FileEntry invalidation should not block future optimizations in the file 484 // caches. Possible alternatives are cache truncation (invalidate last N) or 485 // invalidation of the whole cache. 486 UniqueRealFiles.erase(Entry->getUniqueID()); 487 } 488 489 490 void FileManager::GetUniqueIDMapping( 491 SmallVectorImpl<const FileEntry *> &UIDToFiles) const { 492 UIDToFiles.clear(); 493 UIDToFiles.resize(NextFileUID); 494 495 // Map file entries 496 for (llvm::StringMap<FileEntry*, llvm::BumpPtrAllocator>::const_iterator 497 FE = SeenFileEntries.begin(), FEEnd = SeenFileEntries.end(); 498 FE != FEEnd; ++FE) 499 if (FE->getValue() && FE->getValue() != NON_EXISTENT_FILE) 500 UIDToFiles[FE->getValue()->getUID()] = FE->getValue(); 501 502 // Map virtual file entries 503 for (SmallVectorImpl<FileEntry *>::const_iterator 504 VFE = VirtualFileEntries.begin(), VFEEnd = VirtualFileEntries.end(); 505 VFE != VFEEnd; ++VFE) 506 if (*VFE && *VFE != NON_EXISTENT_FILE) 507 UIDToFiles[(*VFE)->getUID()] = *VFE; 508 } 509 510 void FileManager::modifyFileEntry(FileEntry *File, 511 off_t Size, time_t ModificationTime) { 512 File->Size = Size; 513 File->ModTime = ModificationTime; 514 } 515 516 /// Remove '.' path components from the given absolute path. 517 /// \return \c true if any changes were made. 518 // FIXME: Move this to llvm::sys::path. 519 bool FileManager::removeDotPaths(SmallVectorImpl<char> &Path) { 520 using namespace llvm::sys; 521 522 SmallVector<StringRef, 16> ComponentStack; 523 StringRef P(Path.data(), Path.size()); 524 525 // Skip the root path, then look for traversal in the components. 526 StringRef Rel = path::relative_path(P); 527 bool AnyDots = false; 528 for (StringRef C : llvm::make_range(path::begin(Rel), path::end(Rel))) { 529 if (C == ".") { 530 AnyDots = true; 531 continue; 532 } 533 ComponentStack.push_back(C); 534 } 535 536 if (!AnyDots) 537 return false; 538 539 SmallString<256> Buffer = path::root_path(P); 540 for (StringRef C : ComponentStack) 541 path::append(Buffer, C); 542 543 Path.swap(Buffer); 544 return true; 545 } 546 547 StringRef FileManager::getCanonicalName(const DirectoryEntry *Dir) { 548 // FIXME: use llvm::sys::fs::canonical() when it gets implemented 549 llvm::DenseMap<const DirectoryEntry *, llvm::StringRef>::iterator Known 550 = CanonicalDirNames.find(Dir); 551 if (Known != CanonicalDirNames.end()) 552 return Known->second; 553 554 StringRef CanonicalName(Dir->getName()); 555 556 #ifdef LLVM_ON_UNIX 557 char CanonicalNameBuf[PATH_MAX]; 558 if (realpath(Dir->getName(), CanonicalNameBuf)) { 559 unsigned Len = strlen(CanonicalNameBuf); 560 char *Mem = static_cast<char *>(CanonicalNameStorage.Allocate(Len, 1)); 561 memcpy(Mem, CanonicalNameBuf, Len); 562 CanonicalName = StringRef(Mem, Len); 563 } 564 #else 565 SmallString<256> CanonicalNameBuf(CanonicalName); 566 llvm::sys::fs::make_absolute(CanonicalNameBuf); 567 llvm::sys::path::native(CanonicalNameBuf); 568 removeDotPaths(CanonicalNameBuf); 569 #endif 570 571 CanonicalDirNames.insert(std::make_pair(Dir, CanonicalName)); 572 return CanonicalName; 573 } 574 575 void FileManager::PrintStats() const { 576 llvm::errs() << "\n*** File Manager Stats:\n"; 577 llvm::errs() << UniqueRealFiles.size() << " real files found, " 578 << UniqueRealDirs.size() << " real dirs found.\n"; 579 llvm::errs() << VirtualFileEntries.size() << " virtual files found, " 580 << VirtualDirectoryEntries.size() << " virtual dirs found.\n"; 581 llvm::errs() << NumDirLookups << " dir lookups, " 582 << NumDirCacheMisses << " dir cache misses.\n"; 583 llvm::errs() << NumFileLookups << " file lookups, " 584 << NumFileCacheMisses << " file cache misses.\n"; 585 586 //llvm::errs() << PagesMapped << BytesOfPagesMapped << FSLookups; 587 } 588