1 //===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass statically checks for common and easily-identified constructs 11 // which produce undefined or likely unintended behavior in LLVM IR. 12 // 13 // It is not a guarantee of correctness, in two ways. First, it isn't 14 // comprehensive. There are checks which could be done statically which are 15 // not yet implemented. Some of these are indicated by TODO comments, but 16 // those aren't comprehensive either. Second, many conditions cannot be 17 // checked statically. This pass does no dynamic instrumentation, so it 18 // can't check for all possible problems. 19 // 20 // Another limitation is that it assumes all code will be executed. A store 21 // through a null pointer in a basic block which is never reached is harmless, 22 // but this pass will warn about it anyway. This is the main reason why most 23 // of these checks live here instead of in the Verifier pass. 24 // 25 // Optimization passes may make conditions that this pass checks for more or 26 // less obvious. If an optimization pass appears to be introducing a warning, 27 // it may be that the optimization pass is merely exposing an existing 28 // condition in the code. 29 // 30 // This code may be run before instcombine. In many cases, instcombine checks 31 // for the same kinds of things and turns instructions with undefined behavior 32 // into unreachable (or equivalent). Because of this, this pass makes some 33 // effort to look through bitcasts and so on. 34 // 35 //===----------------------------------------------------------------------===// 36 37 #include "llvm/Analysis/Lint.h" 38 #include "llvm/ADT/STLExtras.h" 39 #include "llvm/ADT/SmallSet.h" 40 #include "llvm/Analysis/AliasAnalysis.h" 41 #include "llvm/Analysis/AssumptionCache.h" 42 #include "llvm/Analysis/ConstantFolding.h" 43 #include "llvm/Analysis/InstructionSimplify.h" 44 #include "llvm/Analysis/Loads.h" 45 #include "llvm/Analysis/Passes.h" 46 #include "llvm/Analysis/TargetLibraryInfo.h" 47 #include "llvm/Analysis/ValueTracking.h" 48 #include "llvm/IR/CallSite.h" 49 #include "llvm/IR/DataLayout.h" 50 #include "llvm/IR/Dominators.h" 51 #include "llvm/IR/Function.h" 52 #include "llvm/IR/InstVisitor.h" 53 #include "llvm/IR/IntrinsicInst.h" 54 #include "llvm/IR/LegacyPassManager.h" 55 #include "llvm/Pass.h" 56 #include "llvm/Support/Debug.h" 57 #include "llvm/Support/raw_ostream.h" 58 using namespace llvm; 59 60 namespace { 61 namespace MemRef { 62 static const unsigned Read = 1; 63 static const unsigned Write = 2; 64 static const unsigned Callee = 4; 65 static const unsigned Branchee = 8; 66 } 67 68 class Lint : public FunctionPass, public InstVisitor<Lint> { 69 friend class InstVisitor<Lint>; 70 71 void visitFunction(Function &F); 72 73 void visitCallSite(CallSite CS); 74 void visitMemoryReference(Instruction &I, Value *Ptr, 75 uint64_t Size, unsigned Align, 76 Type *Ty, unsigned Flags); 77 void visitEHBeginCatch(IntrinsicInst *II); 78 void visitEHEndCatch(IntrinsicInst *II); 79 80 void visitCallInst(CallInst &I); 81 void visitInvokeInst(InvokeInst &I); 82 void visitReturnInst(ReturnInst &I); 83 void visitLoadInst(LoadInst &I); 84 void visitStoreInst(StoreInst &I); 85 void visitXor(BinaryOperator &I); 86 void visitSub(BinaryOperator &I); 87 void visitLShr(BinaryOperator &I); 88 void visitAShr(BinaryOperator &I); 89 void visitShl(BinaryOperator &I); 90 void visitSDiv(BinaryOperator &I); 91 void visitUDiv(BinaryOperator &I); 92 void visitSRem(BinaryOperator &I); 93 void visitURem(BinaryOperator &I); 94 void visitAllocaInst(AllocaInst &I); 95 void visitVAArgInst(VAArgInst &I); 96 void visitIndirectBrInst(IndirectBrInst &I); 97 void visitExtractElementInst(ExtractElementInst &I); 98 void visitInsertElementInst(InsertElementInst &I); 99 void visitUnreachableInst(UnreachableInst &I); 100 101 Value *findValue(Value *V, const DataLayout &DL, bool OffsetOk) const; 102 Value *findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk, 103 SmallPtrSetImpl<Value *> &Visited) const; 104 105 public: 106 Module *Mod; 107 AliasAnalysis *AA; 108 AssumptionCache *AC; 109 DominatorTree *DT; 110 TargetLibraryInfo *TLI; 111 112 std::string Messages; 113 raw_string_ostream MessagesStr; 114 115 static char ID; // Pass identification, replacement for typeid 116 Lint() : FunctionPass(ID), MessagesStr(Messages) { 117 initializeLintPass(*PassRegistry::getPassRegistry()); 118 } 119 120 bool runOnFunction(Function &F) override; 121 122 void getAnalysisUsage(AnalysisUsage &AU) const override { 123 AU.setPreservesAll(); 124 AU.addRequired<AliasAnalysis>(); 125 AU.addRequired<AssumptionCacheTracker>(); 126 AU.addRequired<TargetLibraryInfoWrapperPass>(); 127 AU.addRequired<DominatorTreeWrapperPass>(); 128 } 129 void print(raw_ostream &O, const Module *M) const override {} 130 131 void WriteValues(ArrayRef<const Value *> Vs) { 132 for (const Value *V : Vs) { 133 if (!V) 134 continue; 135 if (isa<Instruction>(V)) { 136 MessagesStr << *V << '\n'; 137 } else { 138 V->printAsOperand(MessagesStr, true, Mod); 139 MessagesStr << '\n'; 140 } 141 } 142 } 143 144 /// \brief A check failed, so printout out the condition and the message. 145 /// 146 /// This provides a nice place to put a breakpoint if you want to see why 147 /// something is not correct. 148 void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; } 149 150 /// \brief A check failed (with values to print). 151 /// 152 /// This calls the Message-only version so that the above is easier to set 153 /// a breakpoint on. 154 template <typename T1, typename... Ts> 155 void CheckFailed(const Twine &Message, const T1 &V1, const Ts &...Vs) { 156 CheckFailed(Message); 157 WriteValues({V1, Vs...}); 158 } 159 }; 160 } 161 162 char Lint::ID = 0; 163 INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", 164 false, true) 165 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 166 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 167 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 168 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 169 INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", 170 false, true) 171 172 // Assert - We know that cond should be true, if not print an error message. 173 #define Assert(C, ...) \ 174 do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (0) 175 176 // Lint::run - This is the main Analysis entry point for a 177 // function. 178 // 179 bool Lint::runOnFunction(Function &F) { 180 Mod = F.getParent(); 181 AA = &getAnalysis<AliasAnalysis>(); 182 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 183 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 184 TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); 185 visit(F); 186 dbgs() << MessagesStr.str(); 187 Messages.clear(); 188 return false; 189 } 190 191 void Lint::visitFunction(Function &F) { 192 // This isn't undefined behavior, it's just a little unusual, and it's a 193 // fairly common mistake to neglect to name a function. 194 Assert(F.hasName() || F.hasLocalLinkage(), 195 "Unusual: Unnamed function with non-local linkage", &F); 196 197 // TODO: Check for irreducible control flow. 198 } 199 200 void Lint::visitCallSite(CallSite CS) { 201 Instruction &I = *CS.getInstruction(); 202 Value *Callee = CS.getCalledValue(); 203 const DataLayout &DL = CS->getModule()->getDataLayout(); 204 205 visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize, 206 0, nullptr, MemRef::Callee); 207 208 if (Function *F = dyn_cast<Function>(findValue(Callee, DL, 209 /*OffsetOk=*/false))) { 210 Assert(CS.getCallingConv() == F->getCallingConv(), 211 "Undefined behavior: Caller and callee calling convention differ", 212 &I); 213 214 FunctionType *FT = F->getFunctionType(); 215 unsigned NumActualArgs = CS.arg_size(); 216 217 Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs 218 : FT->getNumParams() == NumActualArgs, 219 "Undefined behavior: Call argument count mismatches callee " 220 "argument count", 221 &I); 222 223 Assert(FT->getReturnType() == I.getType(), 224 "Undefined behavior: Call return type mismatches " 225 "callee return type", 226 &I); 227 228 // Check argument types (in case the callee was casted) and attributes. 229 // TODO: Verify that caller and callee attributes are compatible. 230 Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end(); 231 CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); 232 for (; AI != AE; ++AI) { 233 Value *Actual = *AI; 234 if (PI != PE) { 235 Argument *Formal = PI++; 236 Assert(Formal->getType() == Actual->getType(), 237 "Undefined behavior: Call argument type mismatches " 238 "callee parameter type", 239 &I); 240 241 // Check that noalias arguments don't alias other arguments. This is 242 // not fully precise because we don't know the sizes of the dereferenced 243 // memory regions. 244 if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) 245 for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) 246 if (AI != BI && (*BI)->getType()->isPointerTy()) { 247 AliasAnalysis::AliasResult Result = AA->alias(*AI, *BI); 248 Assert(Result != AliasAnalysis::MustAlias && 249 Result != AliasAnalysis::PartialAlias, 250 "Unusual: noalias argument aliases another argument", &I); 251 } 252 253 // Check that an sret argument points to valid memory. 254 if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { 255 Type *Ty = 256 cast<PointerType>(Formal->getType())->getElementType(); 257 visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty), 258 DL.getABITypeAlignment(Ty), Ty, 259 MemRef::Read | MemRef::Write); 260 } 261 } 262 } 263 } 264 265 if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall()) 266 for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); 267 AI != AE; ++AI) { 268 Value *Obj = findValue(*AI, DL, /*OffsetOk=*/true); 269 Assert(!isa<AllocaInst>(Obj), 270 "Undefined behavior: Call with \"tail\" keyword references " 271 "alloca", 272 &I); 273 } 274 275 276 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) 277 switch (II->getIntrinsicID()) { 278 default: break; 279 280 // TODO: Check more intrinsics 281 282 case Intrinsic::memcpy: { 283 MemCpyInst *MCI = cast<MemCpyInst>(&I); 284 // TODO: If the size is known, use it. 285 visitMemoryReference(I, MCI->getDest(), AliasAnalysis::UnknownSize, 286 MCI->getAlignment(), nullptr, 287 MemRef::Write); 288 visitMemoryReference(I, MCI->getSource(), AliasAnalysis::UnknownSize, 289 MCI->getAlignment(), nullptr, 290 MemRef::Read); 291 292 // Check that the memcpy arguments don't overlap. The AliasAnalysis API 293 // isn't expressive enough for what we really want to do. Known partial 294 // overlap is not distinguished from the case where nothing is known. 295 uint64_t Size = 0; 296 if (const ConstantInt *Len = 297 dyn_cast<ConstantInt>(findValue(MCI->getLength(), DL, 298 /*OffsetOk=*/false))) 299 if (Len->getValue().isIntN(32)) 300 Size = Len->getValue().getZExtValue(); 301 Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != 302 AliasAnalysis::MustAlias, 303 "Undefined behavior: memcpy source and destination overlap", &I); 304 break; 305 } 306 case Intrinsic::memmove: { 307 MemMoveInst *MMI = cast<MemMoveInst>(&I); 308 // TODO: If the size is known, use it. 309 visitMemoryReference(I, MMI->getDest(), AliasAnalysis::UnknownSize, 310 MMI->getAlignment(), nullptr, 311 MemRef::Write); 312 visitMemoryReference(I, MMI->getSource(), AliasAnalysis::UnknownSize, 313 MMI->getAlignment(), nullptr, 314 MemRef::Read); 315 break; 316 } 317 case Intrinsic::memset: { 318 MemSetInst *MSI = cast<MemSetInst>(&I); 319 // TODO: If the size is known, use it. 320 visitMemoryReference(I, MSI->getDest(), AliasAnalysis::UnknownSize, 321 MSI->getAlignment(), nullptr, 322 MemRef::Write); 323 break; 324 } 325 326 case Intrinsic::vastart: 327 Assert(I.getParent()->getParent()->isVarArg(), 328 "Undefined behavior: va_start called in a non-varargs function", 329 &I); 330 331 visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, 332 0, nullptr, MemRef::Read | MemRef::Write); 333 break; 334 case Intrinsic::vacopy: 335 visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, 336 0, nullptr, MemRef::Write); 337 visitMemoryReference(I, CS.getArgument(1), AliasAnalysis::UnknownSize, 338 0, nullptr, MemRef::Read); 339 break; 340 case Intrinsic::vaend: 341 visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, 342 0, nullptr, MemRef::Read | MemRef::Write); 343 break; 344 345 case Intrinsic::stackrestore: 346 // Stackrestore doesn't read or write memory, but it sets the 347 // stack pointer, which the compiler may read from or write to 348 // at any time, so check it for both readability and writeability. 349 visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, 350 0, nullptr, MemRef::Read | MemRef::Write); 351 break; 352 353 case Intrinsic::eh_begincatch: 354 visitEHBeginCatch(II); 355 break; 356 case Intrinsic::eh_endcatch: 357 visitEHEndCatch(II); 358 break; 359 } 360 } 361 362 void Lint::visitCallInst(CallInst &I) { 363 return visitCallSite(&I); 364 } 365 366 void Lint::visitInvokeInst(InvokeInst &I) { 367 return visitCallSite(&I); 368 } 369 370 void Lint::visitReturnInst(ReturnInst &I) { 371 Function *F = I.getParent()->getParent(); 372 Assert(!F->doesNotReturn(), 373 "Unusual: Return statement in function with noreturn attribute", &I); 374 375 if (Value *V = I.getReturnValue()) { 376 Value *Obj = 377 findValue(V, F->getParent()->getDataLayout(), /*OffsetOk=*/true); 378 Assert(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I); 379 } 380 } 381 382 // TODO: Check that the reference is in bounds. 383 // TODO: Check readnone/readonly function attributes. 384 void Lint::visitMemoryReference(Instruction &I, 385 Value *Ptr, uint64_t Size, unsigned Align, 386 Type *Ty, unsigned Flags) { 387 // If no memory is being referenced, it doesn't matter if the pointer 388 // is valid. 389 if (Size == 0) 390 return; 391 392 Value *UnderlyingObject = 393 findValue(Ptr, I.getModule()->getDataLayout(), /*OffsetOk=*/true); 394 Assert(!isa<ConstantPointerNull>(UnderlyingObject), 395 "Undefined behavior: Null pointer dereference", &I); 396 Assert(!isa<UndefValue>(UnderlyingObject), 397 "Undefined behavior: Undef pointer dereference", &I); 398 Assert(!isa<ConstantInt>(UnderlyingObject) || 399 !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(), 400 "Unusual: All-ones pointer dereference", &I); 401 Assert(!isa<ConstantInt>(UnderlyingObject) || 402 !cast<ConstantInt>(UnderlyingObject)->isOne(), 403 "Unusual: Address one pointer dereference", &I); 404 405 if (Flags & MemRef::Write) { 406 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject)) 407 Assert(!GV->isConstant(), "Undefined behavior: Write to read-only memory", 408 &I); 409 Assert(!isa<Function>(UnderlyingObject) && 410 !isa<BlockAddress>(UnderlyingObject), 411 "Undefined behavior: Write to text section", &I); 412 } 413 if (Flags & MemRef::Read) { 414 Assert(!isa<Function>(UnderlyingObject), "Unusual: Load from function body", 415 &I); 416 Assert(!isa<BlockAddress>(UnderlyingObject), 417 "Undefined behavior: Load from block address", &I); 418 } 419 if (Flags & MemRef::Callee) { 420 Assert(!isa<BlockAddress>(UnderlyingObject), 421 "Undefined behavior: Call to block address", &I); 422 } 423 if (Flags & MemRef::Branchee) { 424 Assert(!isa<Constant>(UnderlyingObject) || 425 isa<BlockAddress>(UnderlyingObject), 426 "Undefined behavior: Branch to non-blockaddress", &I); 427 } 428 429 // Check for buffer overflows and misalignment. 430 // Only handles memory references that read/write something simple like an 431 // alloca instruction or a global variable. 432 auto &DL = I.getModule()->getDataLayout(); 433 int64_t Offset = 0; 434 if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, DL)) { 435 // OK, so the access is to a constant offset from Ptr. Check that Ptr is 436 // something we can handle and if so extract the size of this base object 437 // along with its alignment. 438 uint64_t BaseSize = AliasAnalysis::UnknownSize; 439 unsigned BaseAlign = 0; 440 441 if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { 442 Type *ATy = AI->getAllocatedType(); 443 if (!AI->isArrayAllocation() && ATy->isSized()) 444 BaseSize = DL.getTypeAllocSize(ATy); 445 BaseAlign = AI->getAlignment(); 446 if (BaseAlign == 0 && ATy->isSized()) 447 BaseAlign = DL.getABITypeAlignment(ATy); 448 } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { 449 // If the global may be defined differently in another compilation unit 450 // then don't warn about funky memory accesses. 451 if (GV->hasDefinitiveInitializer()) { 452 Type *GTy = GV->getType()->getElementType(); 453 if (GTy->isSized()) 454 BaseSize = DL.getTypeAllocSize(GTy); 455 BaseAlign = GV->getAlignment(); 456 if (BaseAlign == 0 && GTy->isSized()) 457 BaseAlign = DL.getABITypeAlignment(GTy); 458 } 459 } 460 461 // Accesses from before the start or after the end of the object are not 462 // defined. 463 Assert(Size == AliasAnalysis::UnknownSize || 464 BaseSize == AliasAnalysis::UnknownSize || 465 (Offset >= 0 && Offset + Size <= BaseSize), 466 "Undefined behavior: Buffer overflow", &I); 467 468 // Accesses that say that the memory is more aligned than it is are not 469 // defined. 470 if (Align == 0 && Ty && Ty->isSized()) 471 Align = DL.getABITypeAlignment(Ty); 472 Assert(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), 473 "Undefined behavior: Memory reference address is misaligned", &I); 474 } 475 } 476 477 void Lint::visitLoadInst(LoadInst &I) { 478 visitMemoryReference(I, I.getPointerOperand(), 479 AA->getTypeStoreSize(I.getType()), I.getAlignment(), 480 I.getType(), MemRef::Read); 481 } 482 483 void Lint::visitStoreInst(StoreInst &I) { 484 visitMemoryReference(I, I.getPointerOperand(), 485 AA->getTypeStoreSize(I.getOperand(0)->getType()), 486 I.getAlignment(), 487 I.getOperand(0)->getType(), MemRef::Write); 488 } 489 490 void Lint::visitXor(BinaryOperator &I) { 491 Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)), 492 "Undefined result: xor(undef, undef)", &I); 493 } 494 495 void Lint::visitSub(BinaryOperator &I) { 496 Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)), 497 "Undefined result: sub(undef, undef)", &I); 498 } 499 500 void Lint::visitLShr(BinaryOperator &I) { 501 if (ConstantInt *CI = dyn_cast<ConstantInt>( 502 findValue(I.getOperand(1), I.getModule()->getDataLayout(), 503 /*OffsetOk=*/false))) 504 Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 505 "Undefined result: Shift count out of range", &I); 506 } 507 508 void Lint::visitAShr(BinaryOperator &I) { 509 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue( 510 I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false))) 511 Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 512 "Undefined result: Shift count out of range", &I); 513 } 514 515 void Lint::visitShl(BinaryOperator &I) { 516 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue( 517 I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false))) 518 Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 519 "Undefined result: Shift count out of range", &I); 520 } 521 522 static bool 523 allPredsCameFromLandingPad(BasicBlock *BB, 524 SmallSet<BasicBlock *, 4> &VisitedBlocks) { 525 VisitedBlocks.insert(BB); 526 if (BB->isLandingPad()) 527 return true; 528 // If we find a block with no predecessors, the search failed. 529 if (pred_empty(BB)) 530 return false; 531 for (BasicBlock *Pred : predecessors(BB)) { 532 if (VisitedBlocks.count(Pred)) 533 continue; 534 if (!allPredsCameFromLandingPad(Pred, VisitedBlocks)) 535 return false; 536 } 537 return true; 538 } 539 540 static bool 541 allSuccessorsReachEndCatch(BasicBlock *BB, BasicBlock::iterator InstBegin, 542 IntrinsicInst **SecondBeginCatch, 543 SmallSet<BasicBlock *, 4> &VisitedBlocks) { 544 VisitedBlocks.insert(BB); 545 for (BasicBlock::iterator I = InstBegin, E = BB->end(); I != E; ++I) { 546 IntrinsicInst *IC = dyn_cast<IntrinsicInst>(I); 547 if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) 548 return true; 549 // If we find another begincatch while looking for an endcatch, 550 // that's also an error. 551 if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) { 552 *SecondBeginCatch = IC; 553 return false; 554 } 555 } 556 557 // If we reach a block with no successors while searching, the 558 // search has failed. 559 if (succ_empty(BB)) 560 return false; 561 // Otherwise, search all of the successors. 562 for (BasicBlock *Succ : successors(BB)) { 563 if (VisitedBlocks.count(Succ)) 564 continue; 565 if (!allSuccessorsReachEndCatch(Succ, Succ->begin(), SecondBeginCatch, 566 VisitedBlocks)) 567 return false; 568 } 569 return true; 570 } 571 572 void Lint::visitEHBeginCatch(IntrinsicInst *II) { 573 // The checks in this function make a potentially dubious assumption about 574 // the CFG, namely that any block involved in a catch is only used for the 575 // catch. This will very likely be true of IR generated by a front end, 576 // but it may cease to be true, for example, if the IR is run through a 577 // pass which combines similar blocks. 578 // 579 // In general, if we encounter a block the isn't dominated by the catch 580 // block while we are searching the catch block's successors for a call 581 // to end catch intrinsic, then it is possible that it will be legal for 582 // a path through this block to never reach a call to llvm.eh.endcatch. 583 // An analogous statement could be made about our search for a landing 584 // pad among the catch block's predecessors. 585 // 586 // What is actually required is that no path is possible at runtime that 587 // reaches a call to llvm.eh.begincatch without having previously visited 588 // a landingpad instruction and that no path is possible at runtime that 589 // calls llvm.eh.begincatch and does not subsequently call llvm.eh.endcatch 590 // (mentally adjusting for the fact that in reality these calls will be 591 // removed before code generation). 592 // 593 // Because this is a lint check, we take a pessimistic approach and warn if 594 // the control flow is potentially incorrect. 595 596 SmallSet<BasicBlock *, 4> VisitedBlocks; 597 BasicBlock *CatchBB = II->getParent(); 598 599 // The begin catch must occur in a landing pad block or all paths 600 // to it must have come from a landing pad. 601 Assert(allPredsCameFromLandingPad(CatchBB, VisitedBlocks), 602 "llvm.eh.begincatch may be reachable without passing a landingpad", 603 II); 604 605 // Reset the visited block list. 606 VisitedBlocks.clear(); 607 608 IntrinsicInst *SecondBeginCatch = nullptr; 609 610 // This has to be called before it is asserted. Otherwise, the first assert 611 // below can never be hit. 612 bool EndCatchFound = allSuccessorsReachEndCatch( 613 CatchBB, std::next(static_cast<BasicBlock::iterator>(II)), 614 &SecondBeginCatch, VisitedBlocks); 615 Assert( 616 SecondBeginCatch == nullptr, 617 "llvm.eh.begincatch may be called a second time before llvm.eh.endcatch", 618 II, SecondBeginCatch); 619 Assert(EndCatchFound, 620 "Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch", 621 II); 622 } 623 624 static bool allPredCameFromBeginCatch( 625 BasicBlock *BB, BasicBlock::reverse_iterator InstRbegin, 626 IntrinsicInst **SecondEndCatch, SmallSet<BasicBlock *, 4> &VisitedBlocks) { 627 VisitedBlocks.insert(BB); 628 // Look for a begincatch in this block. 629 for (BasicBlock::reverse_iterator RI = InstRbegin, RE = BB->rend(); RI != RE; 630 ++RI) { 631 IntrinsicInst *IC = dyn_cast<IntrinsicInst>(&*RI); 632 if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) 633 return true; 634 // If we find another end catch before we find a begin catch, that's 635 // an error. 636 if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) { 637 *SecondEndCatch = IC; 638 return false; 639 } 640 // If we encounter a landingpad instruction, the search failed. 641 if (isa<LandingPadInst>(*RI)) 642 return false; 643 } 644 // If while searching we find a block with no predeccesors, 645 // the search failed. 646 if (pred_empty(BB)) 647 return false; 648 // Search any predecessors we haven't seen before. 649 for (BasicBlock *Pred : predecessors(BB)) { 650 if (VisitedBlocks.count(Pred)) 651 continue; 652 if (!allPredCameFromBeginCatch(Pred, Pred->rbegin(), SecondEndCatch, 653 VisitedBlocks)) 654 return false; 655 } 656 return true; 657 } 658 659 void Lint::visitEHEndCatch(IntrinsicInst *II) { 660 // The check in this function makes a potentially dubious assumption about 661 // the CFG, namely that any block involved in a catch is only used for the 662 // catch. This will very likely be true of IR generated by a front end, 663 // but it may cease to be true, for example, if the IR is run through a 664 // pass which combines similar blocks. 665 // 666 // In general, if we encounter a block the isn't post-dominated by the 667 // end catch block while we are searching the end catch block's predecessors 668 // for a call to the begin catch intrinsic, then it is possible that it will 669 // be legal for a path to reach the end catch block without ever having 670 // called llvm.eh.begincatch. 671 // 672 // What is actually required is that no path is possible at runtime that 673 // reaches a call to llvm.eh.endcatch without having previously visited 674 // a call to llvm.eh.begincatch (mentally adjusting for the fact that in 675 // reality these calls will be removed before code generation). 676 // 677 // Because this is a lint check, we take a pessimistic approach and warn if 678 // the control flow is potentially incorrect. 679 680 BasicBlock *EndCatchBB = II->getParent(); 681 682 // Alls paths to the end catch call must pass through a begin catch call. 683 684 // If llvm.eh.begincatch wasn't called in the current block, we'll use this 685 // lambda to recursively look for it in predecessors. 686 SmallSet<BasicBlock *, 4> VisitedBlocks; 687 IntrinsicInst *SecondEndCatch = nullptr; 688 689 // This has to be called before it is asserted. Otherwise, the first assert 690 // below can never be hit. 691 bool BeginCatchFound = 692 allPredCameFromBeginCatch(EndCatchBB, BasicBlock::reverse_iterator(II), 693 &SecondEndCatch, VisitedBlocks); 694 Assert( 695 SecondEndCatch == nullptr, 696 "llvm.eh.endcatch may be called a second time after llvm.eh.begincatch", 697 II, SecondEndCatch); 698 Assert(BeginCatchFound, 699 "llvm.eh.endcatch may be reachable without passing llvm.eh.begincatch", 700 II); 701 } 702 703 static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, 704 AssumptionCache *AC) { 705 // Assume undef could be zero. 706 if (isa<UndefValue>(V)) 707 return true; 708 709 VectorType *VecTy = dyn_cast<VectorType>(V->getType()); 710 if (!VecTy) { 711 unsigned BitWidth = V->getType()->getIntegerBitWidth(); 712 APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); 713 computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, 714 dyn_cast<Instruction>(V), DT); 715 return KnownZero.isAllOnesValue(); 716 } 717 718 // Per-component check doesn't work with zeroinitializer 719 Constant *C = dyn_cast<Constant>(V); 720 if (!C) 721 return false; 722 723 if (C->isZeroValue()) 724 return true; 725 726 // For a vector, KnownZero will only be true if all values are zero, so check 727 // this per component 728 unsigned BitWidth = VecTy->getElementType()->getIntegerBitWidth(); 729 for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) { 730 Constant *Elem = C->getAggregateElement(I); 731 if (isa<UndefValue>(Elem)) 732 return true; 733 734 APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); 735 computeKnownBits(Elem, KnownZero, KnownOne, DL); 736 if (KnownZero.isAllOnesValue()) 737 return true; 738 } 739 740 return false; 741 } 742 743 void Lint::visitSDiv(BinaryOperator &I) { 744 Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 745 "Undefined behavior: Division by zero", &I); 746 } 747 748 void Lint::visitUDiv(BinaryOperator &I) { 749 Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 750 "Undefined behavior: Division by zero", &I); 751 } 752 753 void Lint::visitSRem(BinaryOperator &I) { 754 Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 755 "Undefined behavior: Division by zero", &I); 756 } 757 758 void Lint::visitURem(BinaryOperator &I) { 759 Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 760 "Undefined behavior: Division by zero", &I); 761 } 762 763 void Lint::visitAllocaInst(AllocaInst &I) { 764 if (isa<ConstantInt>(I.getArraySize())) 765 // This isn't undefined behavior, it's just an obvious pessimization. 766 Assert(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), 767 "Pessimization: Static alloca outside of entry block", &I); 768 769 // TODO: Check for an unusual size (MSB set?) 770 } 771 772 void Lint::visitVAArgInst(VAArgInst &I) { 773 visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0, 774 nullptr, MemRef::Read | MemRef::Write); 775 } 776 777 void Lint::visitIndirectBrInst(IndirectBrInst &I) { 778 visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, 779 nullptr, MemRef::Branchee); 780 781 Assert(I.getNumDestinations() != 0, 782 "Undefined behavior: indirectbr with no destinations", &I); 783 } 784 785 void Lint::visitExtractElementInst(ExtractElementInst &I) { 786 if (ConstantInt *CI = dyn_cast<ConstantInt>( 787 findValue(I.getIndexOperand(), I.getModule()->getDataLayout(), 788 /*OffsetOk=*/false))) 789 Assert(CI->getValue().ult(I.getVectorOperandType()->getNumElements()), 790 "Undefined result: extractelement index out of range", &I); 791 } 792 793 void Lint::visitInsertElementInst(InsertElementInst &I) { 794 if (ConstantInt *CI = dyn_cast<ConstantInt>( 795 findValue(I.getOperand(2), I.getModule()->getDataLayout(), 796 /*OffsetOk=*/false))) 797 Assert(CI->getValue().ult(I.getType()->getNumElements()), 798 "Undefined result: insertelement index out of range", &I); 799 } 800 801 void Lint::visitUnreachableInst(UnreachableInst &I) { 802 // This isn't undefined behavior, it's merely suspicious. 803 Assert(&I == I.getParent()->begin() || 804 std::prev(BasicBlock::iterator(&I))->mayHaveSideEffects(), 805 "Unusual: unreachable immediately preceded by instruction without " 806 "side effects", 807 &I); 808 } 809 810 /// findValue - Look through bitcasts and simple memory reference patterns 811 /// to identify an equivalent, but more informative, value. If OffsetOk 812 /// is true, look through getelementptrs with non-zero offsets too. 813 /// 814 /// Most analysis passes don't require this logic, because instcombine 815 /// will simplify most of these kinds of things away. But it's a goal of 816 /// this Lint pass to be useful even on non-optimized IR. 817 Value *Lint::findValue(Value *V, const DataLayout &DL, bool OffsetOk) const { 818 SmallPtrSet<Value *, 4> Visited; 819 return findValueImpl(V, DL, OffsetOk, Visited); 820 } 821 822 /// findValueImpl - Implementation helper for findValue. 823 Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk, 824 SmallPtrSetImpl<Value *> &Visited) const { 825 // Detect self-referential values. 826 if (!Visited.insert(V).second) 827 return UndefValue::get(V->getType()); 828 829 // TODO: Look through sext or zext cast, when the result is known to 830 // be interpreted as signed or unsigned, respectively. 831 // TODO: Look through eliminable cast pairs. 832 // TODO: Look through calls with unique return values. 833 // TODO: Look through vector insert/extract/shuffle. 834 V = OffsetOk ? GetUnderlyingObject(V, DL) : V->stripPointerCasts(); 835 if (LoadInst *L = dyn_cast<LoadInst>(V)) { 836 BasicBlock::iterator BBI = L; 837 BasicBlock *BB = L->getParent(); 838 SmallPtrSet<BasicBlock *, 4> VisitedBlocks; 839 for (;;) { 840 if (!VisitedBlocks.insert(BB).second) 841 break; 842 if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(), 843 BB, BBI, 6, AA)) 844 return findValueImpl(U, DL, OffsetOk, Visited); 845 if (BBI != BB->begin()) break; 846 BB = BB->getUniquePredecessor(); 847 if (!BB) break; 848 BBI = BB->end(); 849 } 850 } else if (PHINode *PN = dyn_cast<PHINode>(V)) { 851 if (Value *W = PN->hasConstantValue()) 852 if (W != V) 853 return findValueImpl(W, DL, OffsetOk, Visited); 854 } else if (CastInst *CI = dyn_cast<CastInst>(V)) { 855 if (CI->isNoopCast(DL)) 856 return findValueImpl(CI->getOperand(0), DL, OffsetOk, Visited); 857 } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { 858 if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), 859 Ex->getIndices())) 860 if (W != V) 861 return findValueImpl(W, DL, OffsetOk, Visited); 862 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { 863 // Same as above, but for ConstantExpr instead of Instruction. 864 if (Instruction::isCast(CE->getOpcode())) { 865 if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), 866 CE->getOperand(0)->getType(), CE->getType(), 867 DL.getIntPtrType(V->getType()))) 868 return findValueImpl(CE->getOperand(0), DL, OffsetOk, Visited); 869 } else if (CE->getOpcode() == Instruction::ExtractValue) { 870 ArrayRef<unsigned> Indices = CE->getIndices(); 871 if (Value *W = FindInsertedValue(CE->getOperand(0), Indices)) 872 if (W != V) 873 return findValueImpl(W, DL, OffsetOk, Visited); 874 } 875 } 876 877 // As a last resort, try SimplifyInstruction or constant folding. 878 if (Instruction *Inst = dyn_cast<Instruction>(V)) { 879 if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT, AC)) 880 return findValueImpl(W, DL, OffsetOk, Visited); 881 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { 882 if (Value *W = ConstantFoldConstantExpression(CE, DL, TLI)) 883 if (W != V) 884 return findValueImpl(W, DL, OffsetOk, Visited); 885 } 886 887 return V; 888 } 889 890 //===----------------------------------------------------------------------===// 891 // Implement the public interfaces to this file... 892 //===----------------------------------------------------------------------===// 893 894 FunctionPass *llvm::createLintPass() { 895 return new Lint(); 896 } 897 898 /// lintFunction - Check a function for errors, printing messages on stderr. 899 /// 900 void llvm::lintFunction(const Function &f) { 901 Function &F = const_cast<Function&>(f); 902 assert(!F.isDeclaration() && "Cannot lint external functions"); 903 904 legacy::FunctionPassManager FPM(F.getParent()); 905 Lint *V = new Lint(); 906 FPM.add(V); 907 FPM.run(F); 908 } 909 910 /// lintModule - Check a module for errors, printing messages on stderr. 911 /// 912 void llvm::lintModule(const Module &M) { 913 legacy::PassManager PM; 914 Lint *V = new Lint(); 915 PM.add(V); 916 PM.run(const_cast<Module&>(M)); 917 } 918