Home | History | Annotate | Download | only in Analysis
      1 //===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass statically checks for common and easily-identified constructs
     11 // which produce undefined or likely unintended behavior in LLVM IR.
     12 //
     13 // It is not a guarantee of correctness, in two ways. First, it isn't
     14 // comprehensive. There are checks which could be done statically which are
     15 // not yet implemented. Some of these are indicated by TODO comments, but
     16 // those aren't comprehensive either. Second, many conditions cannot be
     17 // checked statically. This pass does no dynamic instrumentation, so it
     18 // can't check for all possible problems.
     19 //
     20 // Another limitation is that it assumes all code will be executed. A store
     21 // through a null pointer in a basic block which is never reached is harmless,
     22 // but this pass will warn about it anyway. This is the main reason why most
     23 // of these checks live here instead of in the Verifier pass.
     24 //
     25 // Optimization passes may make conditions that this pass checks for more or
     26 // less obvious. If an optimization pass appears to be introducing a warning,
     27 // it may be that the optimization pass is merely exposing an existing
     28 // condition in the code.
     29 //
     30 // This code may be run before instcombine. In many cases, instcombine checks
     31 // for the same kinds of things and turns instructions with undefined behavior
     32 // into unreachable (or equivalent). Because of this, this pass makes some
     33 // effort to look through bitcasts and so on.
     34 //
     35 //===----------------------------------------------------------------------===//
     36 
     37 #include "llvm/Analysis/Lint.h"
     38 #include "llvm/ADT/STLExtras.h"
     39 #include "llvm/ADT/SmallSet.h"
     40 #include "llvm/Analysis/AliasAnalysis.h"
     41 #include "llvm/Analysis/AssumptionCache.h"
     42 #include "llvm/Analysis/ConstantFolding.h"
     43 #include "llvm/Analysis/InstructionSimplify.h"
     44 #include "llvm/Analysis/Loads.h"
     45 #include "llvm/Analysis/Passes.h"
     46 #include "llvm/Analysis/TargetLibraryInfo.h"
     47 #include "llvm/Analysis/ValueTracking.h"
     48 #include "llvm/IR/CallSite.h"
     49 #include "llvm/IR/DataLayout.h"
     50 #include "llvm/IR/Dominators.h"
     51 #include "llvm/IR/Function.h"
     52 #include "llvm/IR/InstVisitor.h"
     53 #include "llvm/IR/IntrinsicInst.h"
     54 #include "llvm/IR/LegacyPassManager.h"
     55 #include "llvm/Pass.h"
     56 #include "llvm/Support/Debug.h"
     57 #include "llvm/Support/raw_ostream.h"
     58 using namespace llvm;
     59 
     60 namespace {
     61   namespace MemRef {
     62     static const unsigned Read     = 1;
     63     static const unsigned Write    = 2;
     64     static const unsigned Callee   = 4;
     65     static const unsigned Branchee = 8;
     66   }
     67 
     68   class Lint : public FunctionPass, public InstVisitor<Lint> {
     69     friend class InstVisitor<Lint>;
     70 
     71     void visitFunction(Function &F);
     72 
     73     void visitCallSite(CallSite CS);
     74     void visitMemoryReference(Instruction &I, Value *Ptr,
     75                               uint64_t Size, unsigned Align,
     76                               Type *Ty, unsigned Flags);
     77     void visitEHBeginCatch(IntrinsicInst *II);
     78     void visitEHEndCatch(IntrinsicInst *II);
     79 
     80     void visitCallInst(CallInst &I);
     81     void visitInvokeInst(InvokeInst &I);
     82     void visitReturnInst(ReturnInst &I);
     83     void visitLoadInst(LoadInst &I);
     84     void visitStoreInst(StoreInst &I);
     85     void visitXor(BinaryOperator &I);
     86     void visitSub(BinaryOperator &I);
     87     void visitLShr(BinaryOperator &I);
     88     void visitAShr(BinaryOperator &I);
     89     void visitShl(BinaryOperator &I);
     90     void visitSDiv(BinaryOperator &I);
     91     void visitUDiv(BinaryOperator &I);
     92     void visitSRem(BinaryOperator &I);
     93     void visitURem(BinaryOperator &I);
     94     void visitAllocaInst(AllocaInst &I);
     95     void visitVAArgInst(VAArgInst &I);
     96     void visitIndirectBrInst(IndirectBrInst &I);
     97     void visitExtractElementInst(ExtractElementInst &I);
     98     void visitInsertElementInst(InsertElementInst &I);
     99     void visitUnreachableInst(UnreachableInst &I);
    100 
    101     Value *findValue(Value *V, const DataLayout &DL, bool OffsetOk) const;
    102     Value *findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk,
    103                          SmallPtrSetImpl<Value *> &Visited) const;
    104 
    105   public:
    106     Module *Mod;
    107     AliasAnalysis *AA;
    108     AssumptionCache *AC;
    109     DominatorTree *DT;
    110     TargetLibraryInfo *TLI;
    111 
    112     std::string Messages;
    113     raw_string_ostream MessagesStr;
    114 
    115     static char ID; // Pass identification, replacement for typeid
    116     Lint() : FunctionPass(ID), MessagesStr(Messages) {
    117       initializeLintPass(*PassRegistry::getPassRegistry());
    118     }
    119 
    120     bool runOnFunction(Function &F) override;
    121 
    122     void getAnalysisUsage(AnalysisUsage &AU) const override {
    123       AU.setPreservesAll();
    124       AU.addRequired<AliasAnalysis>();
    125       AU.addRequired<AssumptionCacheTracker>();
    126       AU.addRequired<TargetLibraryInfoWrapperPass>();
    127       AU.addRequired<DominatorTreeWrapperPass>();
    128     }
    129     void print(raw_ostream &O, const Module *M) const override {}
    130 
    131     void WriteValues(ArrayRef<const Value *> Vs) {
    132       for (const Value *V : Vs) {
    133         if (!V)
    134           continue;
    135         if (isa<Instruction>(V)) {
    136           MessagesStr << *V << '\n';
    137         } else {
    138           V->printAsOperand(MessagesStr, true, Mod);
    139           MessagesStr << '\n';
    140         }
    141       }
    142     }
    143 
    144     /// \brief A check failed, so printout out the condition and the message.
    145     ///
    146     /// This provides a nice place to put a breakpoint if you want to see why
    147     /// something is not correct.
    148     void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; }
    149 
    150     /// \brief A check failed (with values to print).
    151     ///
    152     /// This calls the Message-only version so that the above is easier to set
    153     /// a breakpoint on.
    154     template <typename T1, typename... Ts>
    155     void CheckFailed(const Twine &Message, const T1 &V1, const Ts &...Vs) {
    156       CheckFailed(Message);
    157       WriteValues({V1, Vs...});
    158     }
    159   };
    160 }
    161 
    162 char Lint::ID = 0;
    163 INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
    164                       false, true)
    165 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
    166 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
    167 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
    168 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
    169 INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
    170                     false, true)
    171 
    172 // Assert - We know that cond should be true, if not print an error message.
    173 #define Assert(C, ...) \
    174     do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (0)
    175 
    176 // Lint::run - This is the main Analysis entry point for a
    177 // function.
    178 //
    179 bool Lint::runOnFunction(Function &F) {
    180   Mod = F.getParent();
    181   AA = &getAnalysis<AliasAnalysis>();
    182   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
    183   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
    184   TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
    185   visit(F);
    186   dbgs() << MessagesStr.str();
    187   Messages.clear();
    188   return false;
    189 }
    190 
    191 void Lint::visitFunction(Function &F) {
    192   // This isn't undefined behavior, it's just a little unusual, and it's a
    193   // fairly common mistake to neglect to name a function.
    194   Assert(F.hasName() || F.hasLocalLinkage(),
    195          "Unusual: Unnamed function with non-local linkage", &F);
    196 
    197   // TODO: Check for irreducible control flow.
    198 }
    199 
    200 void Lint::visitCallSite(CallSite CS) {
    201   Instruction &I = *CS.getInstruction();
    202   Value *Callee = CS.getCalledValue();
    203   const DataLayout &DL = CS->getModule()->getDataLayout();
    204 
    205   visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize,
    206                        0, nullptr, MemRef::Callee);
    207 
    208   if (Function *F = dyn_cast<Function>(findValue(Callee, DL,
    209                                                  /*OffsetOk=*/false))) {
    210     Assert(CS.getCallingConv() == F->getCallingConv(),
    211            "Undefined behavior: Caller and callee calling convention differ",
    212            &I);
    213 
    214     FunctionType *FT = F->getFunctionType();
    215     unsigned NumActualArgs = CS.arg_size();
    216 
    217     Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs
    218                           : FT->getNumParams() == NumActualArgs,
    219            "Undefined behavior: Call argument count mismatches callee "
    220            "argument count",
    221            &I);
    222 
    223     Assert(FT->getReturnType() == I.getType(),
    224            "Undefined behavior: Call return type mismatches "
    225            "callee return type",
    226            &I);
    227 
    228     // Check argument types (in case the callee was casted) and attributes.
    229     // TODO: Verify that caller and callee attributes are compatible.
    230     Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end();
    231     CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
    232     for (; AI != AE; ++AI) {
    233       Value *Actual = *AI;
    234       if (PI != PE) {
    235         Argument *Formal = PI++;
    236         Assert(Formal->getType() == Actual->getType(),
    237                "Undefined behavior: Call argument type mismatches "
    238                "callee parameter type",
    239                &I);
    240 
    241         // Check that noalias arguments don't alias other arguments. This is
    242         // not fully precise because we don't know the sizes of the dereferenced
    243         // memory regions.
    244         if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
    245           for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
    246             if (AI != BI && (*BI)->getType()->isPointerTy()) {
    247               AliasAnalysis::AliasResult Result = AA->alias(*AI, *BI);
    248               Assert(Result != AliasAnalysis::MustAlias &&
    249                          Result != AliasAnalysis::PartialAlias,
    250                      "Unusual: noalias argument aliases another argument", &I);
    251             }
    252 
    253         // Check that an sret argument points to valid memory.
    254         if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
    255           Type *Ty =
    256             cast<PointerType>(Formal->getType())->getElementType();
    257           visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty),
    258                                DL.getABITypeAlignment(Ty), Ty,
    259                                MemRef::Read | MemRef::Write);
    260         }
    261       }
    262     }
    263   }
    264 
    265   if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
    266     for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
    267          AI != AE; ++AI) {
    268       Value *Obj = findValue(*AI, DL, /*OffsetOk=*/true);
    269       Assert(!isa<AllocaInst>(Obj),
    270              "Undefined behavior: Call with \"tail\" keyword references "
    271              "alloca",
    272              &I);
    273     }
    274 
    275 
    276   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
    277     switch (II->getIntrinsicID()) {
    278     default: break;
    279 
    280     // TODO: Check more intrinsics
    281 
    282     case Intrinsic::memcpy: {
    283       MemCpyInst *MCI = cast<MemCpyInst>(&I);
    284       // TODO: If the size is known, use it.
    285       visitMemoryReference(I, MCI->getDest(), AliasAnalysis::UnknownSize,
    286                            MCI->getAlignment(), nullptr,
    287                            MemRef::Write);
    288       visitMemoryReference(I, MCI->getSource(), AliasAnalysis::UnknownSize,
    289                            MCI->getAlignment(), nullptr,
    290                            MemRef::Read);
    291 
    292       // Check that the memcpy arguments don't overlap. The AliasAnalysis API
    293       // isn't expressive enough for what we really want to do. Known partial
    294       // overlap is not distinguished from the case where nothing is known.
    295       uint64_t Size = 0;
    296       if (const ConstantInt *Len =
    297               dyn_cast<ConstantInt>(findValue(MCI->getLength(), DL,
    298                                               /*OffsetOk=*/false)))
    299         if (Len->getValue().isIntN(32))
    300           Size = Len->getValue().getZExtValue();
    301       Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
    302                  AliasAnalysis::MustAlias,
    303              "Undefined behavior: memcpy source and destination overlap", &I);
    304       break;
    305     }
    306     case Intrinsic::memmove: {
    307       MemMoveInst *MMI = cast<MemMoveInst>(&I);
    308       // TODO: If the size is known, use it.
    309       visitMemoryReference(I, MMI->getDest(), AliasAnalysis::UnknownSize,
    310                            MMI->getAlignment(), nullptr,
    311                            MemRef::Write);
    312       visitMemoryReference(I, MMI->getSource(), AliasAnalysis::UnknownSize,
    313                            MMI->getAlignment(), nullptr,
    314                            MemRef::Read);
    315       break;
    316     }
    317     case Intrinsic::memset: {
    318       MemSetInst *MSI = cast<MemSetInst>(&I);
    319       // TODO: If the size is known, use it.
    320       visitMemoryReference(I, MSI->getDest(), AliasAnalysis::UnknownSize,
    321                            MSI->getAlignment(), nullptr,
    322                            MemRef::Write);
    323       break;
    324     }
    325 
    326     case Intrinsic::vastart:
    327       Assert(I.getParent()->getParent()->isVarArg(),
    328              "Undefined behavior: va_start called in a non-varargs function",
    329              &I);
    330 
    331       visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
    332                            0, nullptr, MemRef::Read | MemRef::Write);
    333       break;
    334     case Intrinsic::vacopy:
    335       visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
    336                            0, nullptr, MemRef::Write);
    337       visitMemoryReference(I, CS.getArgument(1), AliasAnalysis::UnknownSize,
    338                            0, nullptr, MemRef::Read);
    339       break;
    340     case Intrinsic::vaend:
    341       visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
    342                            0, nullptr, MemRef::Read | MemRef::Write);
    343       break;
    344 
    345     case Intrinsic::stackrestore:
    346       // Stackrestore doesn't read or write memory, but it sets the
    347       // stack pointer, which the compiler may read from or write to
    348       // at any time, so check it for both readability and writeability.
    349       visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
    350                            0, nullptr, MemRef::Read | MemRef::Write);
    351       break;
    352 
    353     case Intrinsic::eh_begincatch:
    354       visitEHBeginCatch(II);
    355       break;
    356     case Intrinsic::eh_endcatch:
    357       visitEHEndCatch(II);
    358       break;
    359     }
    360 }
    361 
    362 void Lint::visitCallInst(CallInst &I) {
    363   return visitCallSite(&I);
    364 }
    365 
    366 void Lint::visitInvokeInst(InvokeInst &I) {
    367   return visitCallSite(&I);
    368 }
    369 
    370 void Lint::visitReturnInst(ReturnInst &I) {
    371   Function *F = I.getParent()->getParent();
    372   Assert(!F->doesNotReturn(),
    373          "Unusual: Return statement in function with noreturn attribute", &I);
    374 
    375   if (Value *V = I.getReturnValue()) {
    376     Value *Obj =
    377         findValue(V, F->getParent()->getDataLayout(), /*OffsetOk=*/true);
    378     Assert(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I);
    379   }
    380 }
    381 
    382 // TODO: Check that the reference is in bounds.
    383 // TODO: Check readnone/readonly function attributes.
    384 void Lint::visitMemoryReference(Instruction &I,
    385                                 Value *Ptr, uint64_t Size, unsigned Align,
    386                                 Type *Ty, unsigned Flags) {
    387   // If no memory is being referenced, it doesn't matter if the pointer
    388   // is valid.
    389   if (Size == 0)
    390     return;
    391 
    392   Value *UnderlyingObject =
    393       findValue(Ptr, I.getModule()->getDataLayout(), /*OffsetOk=*/true);
    394   Assert(!isa<ConstantPointerNull>(UnderlyingObject),
    395          "Undefined behavior: Null pointer dereference", &I);
    396   Assert(!isa<UndefValue>(UnderlyingObject),
    397          "Undefined behavior: Undef pointer dereference", &I);
    398   Assert(!isa<ConstantInt>(UnderlyingObject) ||
    399              !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(),
    400          "Unusual: All-ones pointer dereference", &I);
    401   Assert(!isa<ConstantInt>(UnderlyingObject) ||
    402              !cast<ConstantInt>(UnderlyingObject)->isOne(),
    403          "Unusual: Address one pointer dereference", &I);
    404 
    405   if (Flags & MemRef::Write) {
    406     if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject))
    407       Assert(!GV->isConstant(), "Undefined behavior: Write to read-only memory",
    408              &I);
    409     Assert(!isa<Function>(UnderlyingObject) &&
    410                !isa<BlockAddress>(UnderlyingObject),
    411            "Undefined behavior: Write to text section", &I);
    412   }
    413   if (Flags & MemRef::Read) {
    414     Assert(!isa<Function>(UnderlyingObject), "Unusual: Load from function body",
    415            &I);
    416     Assert(!isa<BlockAddress>(UnderlyingObject),
    417            "Undefined behavior: Load from block address", &I);
    418   }
    419   if (Flags & MemRef::Callee) {
    420     Assert(!isa<BlockAddress>(UnderlyingObject),
    421            "Undefined behavior: Call to block address", &I);
    422   }
    423   if (Flags & MemRef::Branchee) {
    424     Assert(!isa<Constant>(UnderlyingObject) ||
    425                isa<BlockAddress>(UnderlyingObject),
    426            "Undefined behavior: Branch to non-blockaddress", &I);
    427   }
    428 
    429   // Check for buffer overflows and misalignment.
    430   // Only handles memory references that read/write something simple like an
    431   // alloca instruction or a global variable.
    432   auto &DL = I.getModule()->getDataLayout();
    433   int64_t Offset = 0;
    434   if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, DL)) {
    435     // OK, so the access is to a constant offset from Ptr.  Check that Ptr is
    436     // something we can handle and if so extract the size of this base object
    437     // along with its alignment.
    438     uint64_t BaseSize = AliasAnalysis::UnknownSize;
    439     unsigned BaseAlign = 0;
    440 
    441     if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
    442       Type *ATy = AI->getAllocatedType();
    443       if (!AI->isArrayAllocation() && ATy->isSized())
    444         BaseSize = DL.getTypeAllocSize(ATy);
    445       BaseAlign = AI->getAlignment();
    446       if (BaseAlign == 0 && ATy->isSized())
    447         BaseAlign = DL.getABITypeAlignment(ATy);
    448     } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
    449       // If the global may be defined differently in another compilation unit
    450       // then don't warn about funky memory accesses.
    451       if (GV->hasDefinitiveInitializer()) {
    452         Type *GTy = GV->getType()->getElementType();
    453         if (GTy->isSized())
    454           BaseSize = DL.getTypeAllocSize(GTy);
    455         BaseAlign = GV->getAlignment();
    456         if (BaseAlign == 0 && GTy->isSized())
    457           BaseAlign = DL.getABITypeAlignment(GTy);
    458       }
    459     }
    460 
    461     // Accesses from before the start or after the end of the object are not
    462     // defined.
    463     Assert(Size == AliasAnalysis::UnknownSize ||
    464                BaseSize == AliasAnalysis::UnknownSize ||
    465                (Offset >= 0 && Offset + Size <= BaseSize),
    466            "Undefined behavior: Buffer overflow", &I);
    467 
    468     // Accesses that say that the memory is more aligned than it is are not
    469     // defined.
    470     if (Align == 0 && Ty && Ty->isSized())
    471       Align = DL.getABITypeAlignment(Ty);
    472     Assert(!BaseAlign || Align <= MinAlign(BaseAlign, Offset),
    473            "Undefined behavior: Memory reference address is misaligned", &I);
    474   }
    475 }
    476 
    477 void Lint::visitLoadInst(LoadInst &I) {
    478   visitMemoryReference(I, I.getPointerOperand(),
    479                        AA->getTypeStoreSize(I.getType()), I.getAlignment(),
    480                        I.getType(), MemRef::Read);
    481 }
    482 
    483 void Lint::visitStoreInst(StoreInst &I) {
    484   visitMemoryReference(I, I.getPointerOperand(),
    485                        AA->getTypeStoreSize(I.getOperand(0)->getType()),
    486                        I.getAlignment(),
    487                        I.getOperand(0)->getType(), MemRef::Write);
    488 }
    489 
    490 void Lint::visitXor(BinaryOperator &I) {
    491   Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
    492          "Undefined result: xor(undef, undef)", &I);
    493 }
    494 
    495 void Lint::visitSub(BinaryOperator &I) {
    496   Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
    497          "Undefined result: sub(undef, undef)", &I);
    498 }
    499 
    500 void Lint::visitLShr(BinaryOperator &I) {
    501   if (ConstantInt *CI = dyn_cast<ConstantInt>(
    502           findValue(I.getOperand(1), I.getModule()->getDataLayout(),
    503                     /*OffsetOk=*/false)))
    504     Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
    505            "Undefined result: Shift count out of range", &I);
    506 }
    507 
    508 void Lint::visitAShr(BinaryOperator &I) {
    509   if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(
    510           I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false)))
    511     Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
    512            "Undefined result: Shift count out of range", &I);
    513 }
    514 
    515 void Lint::visitShl(BinaryOperator &I) {
    516   if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(
    517           I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false)))
    518     Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
    519            "Undefined result: Shift count out of range", &I);
    520 }
    521 
    522 static bool
    523 allPredsCameFromLandingPad(BasicBlock *BB,
    524                            SmallSet<BasicBlock *, 4> &VisitedBlocks) {
    525   VisitedBlocks.insert(BB);
    526   if (BB->isLandingPad())
    527     return true;
    528   // If we find a block with no predecessors, the search failed.
    529   if (pred_empty(BB))
    530     return false;
    531   for (BasicBlock *Pred : predecessors(BB)) {
    532     if (VisitedBlocks.count(Pred))
    533       continue;
    534     if (!allPredsCameFromLandingPad(Pred, VisitedBlocks))
    535       return false;
    536   }
    537   return true;
    538 }
    539 
    540 static bool
    541 allSuccessorsReachEndCatch(BasicBlock *BB, BasicBlock::iterator InstBegin,
    542                            IntrinsicInst **SecondBeginCatch,
    543                            SmallSet<BasicBlock *, 4> &VisitedBlocks) {
    544   VisitedBlocks.insert(BB);
    545   for (BasicBlock::iterator I = InstBegin, E = BB->end(); I != E; ++I) {
    546     IntrinsicInst *IC = dyn_cast<IntrinsicInst>(I);
    547     if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch)
    548       return true;
    549     // If we find another begincatch while looking for an endcatch,
    550     // that's also an error.
    551     if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) {
    552       *SecondBeginCatch = IC;
    553       return false;
    554     }
    555   }
    556 
    557   // If we reach a block with no successors while searching, the
    558   // search has failed.
    559   if (succ_empty(BB))
    560     return false;
    561   // Otherwise, search all of the successors.
    562   for (BasicBlock *Succ : successors(BB)) {
    563     if (VisitedBlocks.count(Succ))
    564       continue;
    565     if (!allSuccessorsReachEndCatch(Succ, Succ->begin(), SecondBeginCatch,
    566                                     VisitedBlocks))
    567       return false;
    568   }
    569   return true;
    570 }
    571 
    572 void Lint::visitEHBeginCatch(IntrinsicInst *II) {
    573   // The checks in this function make a potentially dubious assumption about
    574   // the CFG, namely that any block involved in a catch is only used for the
    575   // catch.  This will very likely be true of IR generated by a front end,
    576   // but it may cease to be true, for example, if the IR is run through a
    577   // pass which combines similar blocks.
    578   //
    579   // In general, if we encounter a block the isn't dominated by the catch
    580   // block while we are searching the catch block's successors for a call
    581   // to end catch intrinsic, then it is possible that it will be legal for
    582   // a path through this block to never reach a call to llvm.eh.endcatch.
    583   // An analogous statement could be made about our search for a landing
    584   // pad among the catch block's predecessors.
    585   //
    586   // What is actually required is that no path is possible at runtime that
    587   // reaches a call to llvm.eh.begincatch without having previously visited
    588   // a landingpad instruction and that no path is possible at runtime that
    589   // calls llvm.eh.begincatch and does not subsequently call llvm.eh.endcatch
    590   // (mentally adjusting for the fact that in reality these calls will be
    591   // removed before code generation).
    592   //
    593   // Because this is a lint check, we take a pessimistic approach and warn if
    594   // the control flow is potentially incorrect.
    595 
    596   SmallSet<BasicBlock *, 4> VisitedBlocks;
    597   BasicBlock *CatchBB = II->getParent();
    598 
    599   // The begin catch must occur in a landing pad block or all paths
    600   // to it must have come from a landing pad.
    601   Assert(allPredsCameFromLandingPad(CatchBB, VisitedBlocks),
    602          "llvm.eh.begincatch may be reachable without passing a landingpad",
    603          II);
    604 
    605   // Reset the visited block list.
    606   VisitedBlocks.clear();
    607 
    608   IntrinsicInst *SecondBeginCatch = nullptr;
    609 
    610   // This has to be called before it is asserted.  Otherwise, the first assert
    611   // below can never be hit.
    612   bool EndCatchFound = allSuccessorsReachEndCatch(
    613       CatchBB, std::next(static_cast<BasicBlock::iterator>(II)),
    614       &SecondBeginCatch, VisitedBlocks);
    615   Assert(
    616       SecondBeginCatch == nullptr,
    617       "llvm.eh.begincatch may be called a second time before llvm.eh.endcatch",
    618       II, SecondBeginCatch);
    619   Assert(EndCatchFound,
    620          "Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch",
    621          II);
    622 }
    623 
    624 static bool allPredCameFromBeginCatch(
    625     BasicBlock *BB, BasicBlock::reverse_iterator InstRbegin,
    626     IntrinsicInst **SecondEndCatch, SmallSet<BasicBlock *, 4> &VisitedBlocks) {
    627   VisitedBlocks.insert(BB);
    628   // Look for a begincatch in this block.
    629   for (BasicBlock::reverse_iterator RI = InstRbegin, RE = BB->rend(); RI != RE;
    630        ++RI) {
    631     IntrinsicInst *IC = dyn_cast<IntrinsicInst>(&*RI);
    632     if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch)
    633       return true;
    634     // If we find another end catch before we find a begin catch, that's
    635     // an error.
    636     if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) {
    637       *SecondEndCatch = IC;
    638       return false;
    639     }
    640     // If we encounter a landingpad instruction, the search failed.
    641     if (isa<LandingPadInst>(*RI))
    642       return false;
    643   }
    644   // If while searching we find a block with no predeccesors,
    645   // the search failed.
    646   if (pred_empty(BB))
    647     return false;
    648   // Search any predecessors we haven't seen before.
    649   for (BasicBlock *Pred : predecessors(BB)) {
    650     if (VisitedBlocks.count(Pred))
    651       continue;
    652     if (!allPredCameFromBeginCatch(Pred, Pred->rbegin(), SecondEndCatch,
    653                                    VisitedBlocks))
    654       return false;
    655   }
    656   return true;
    657 }
    658 
    659 void Lint::visitEHEndCatch(IntrinsicInst *II) {
    660   // The check in this function makes a potentially dubious assumption about
    661   // the CFG, namely that any block involved in a catch is only used for the
    662   // catch.  This will very likely be true of IR generated by a front end,
    663   // but it may cease to be true, for example, if the IR is run through a
    664   // pass which combines similar blocks.
    665   //
    666   // In general, if we encounter a block the isn't post-dominated by the
    667   // end catch block while we are searching the end catch block's predecessors
    668   // for a call to the begin catch intrinsic, then it is possible that it will
    669   // be legal for a path to reach the end catch block without ever having
    670   // called llvm.eh.begincatch.
    671   //
    672   // What is actually required is that no path is possible at runtime that
    673   // reaches a call to llvm.eh.endcatch without having previously visited
    674   // a call to llvm.eh.begincatch (mentally adjusting for the fact that in
    675   // reality these calls will be removed before code generation).
    676   //
    677   // Because this is a lint check, we take a pessimistic approach and warn if
    678   // the control flow is potentially incorrect.
    679 
    680   BasicBlock *EndCatchBB = II->getParent();
    681 
    682   // Alls paths to the end catch call must pass through a begin catch call.
    683 
    684   // If llvm.eh.begincatch wasn't called in the current block, we'll use this
    685   // lambda to recursively look for it in predecessors.
    686   SmallSet<BasicBlock *, 4> VisitedBlocks;
    687   IntrinsicInst *SecondEndCatch = nullptr;
    688 
    689   // This has to be called before it is asserted.  Otherwise, the first assert
    690   // below can never be hit.
    691   bool BeginCatchFound =
    692       allPredCameFromBeginCatch(EndCatchBB, BasicBlock::reverse_iterator(II),
    693                                 &SecondEndCatch, VisitedBlocks);
    694   Assert(
    695       SecondEndCatch == nullptr,
    696       "llvm.eh.endcatch may be called a second time after llvm.eh.begincatch",
    697       II, SecondEndCatch);
    698   Assert(BeginCatchFound,
    699          "llvm.eh.endcatch may be reachable without passing llvm.eh.begincatch",
    700          II);
    701 }
    702 
    703 static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
    704                    AssumptionCache *AC) {
    705   // Assume undef could be zero.
    706   if (isa<UndefValue>(V))
    707     return true;
    708 
    709   VectorType *VecTy = dyn_cast<VectorType>(V->getType());
    710   if (!VecTy) {
    711     unsigned BitWidth = V->getType()->getIntegerBitWidth();
    712     APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
    713     computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC,
    714                      dyn_cast<Instruction>(V), DT);
    715     return KnownZero.isAllOnesValue();
    716   }
    717 
    718   // Per-component check doesn't work with zeroinitializer
    719   Constant *C = dyn_cast<Constant>(V);
    720   if (!C)
    721     return false;
    722 
    723   if (C->isZeroValue())
    724     return true;
    725 
    726   // For a vector, KnownZero will only be true if all values are zero, so check
    727   // this per component
    728   unsigned BitWidth = VecTy->getElementType()->getIntegerBitWidth();
    729   for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) {
    730     Constant *Elem = C->getAggregateElement(I);
    731     if (isa<UndefValue>(Elem))
    732       return true;
    733 
    734     APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
    735     computeKnownBits(Elem, KnownZero, KnownOne, DL);
    736     if (KnownZero.isAllOnesValue())
    737       return true;
    738   }
    739 
    740   return false;
    741 }
    742 
    743 void Lint::visitSDiv(BinaryOperator &I) {
    744   Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
    745          "Undefined behavior: Division by zero", &I);
    746 }
    747 
    748 void Lint::visitUDiv(BinaryOperator &I) {
    749   Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
    750          "Undefined behavior: Division by zero", &I);
    751 }
    752 
    753 void Lint::visitSRem(BinaryOperator &I) {
    754   Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
    755          "Undefined behavior: Division by zero", &I);
    756 }
    757 
    758 void Lint::visitURem(BinaryOperator &I) {
    759   Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
    760          "Undefined behavior: Division by zero", &I);
    761 }
    762 
    763 void Lint::visitAllocaInst(AllocaInst &I) {
    764   if (isa<ConstantInt>(I.getArraySize()))
    765     // This isn't undefined behavior, it's just an obvious pessimization.
    766     Assert(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
    767            "Pessimization: Static alloca outside of entry block", &I);
    768 
    769   // TODO: Check for an unusual size (MSB set?)
    770 }
    771 
    772 void Lint::visitVAArgInst(VAArgInst &I) {
    773   visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0,
    774                        nullptr, MemRef::Read | MemRef::Write);
    775 }
    776 
    777 void Lint::visitIndirectBrInst(IndirectBrInst &I) {
    778   visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0,
    779                        nullptr, MemRef::Branchee);
    780 
    781   Assert(I.getNumDestinations() != 0,
    782          "Undefined behavior: indirectbr with no destinations", &I);
    783 }
    784 
    785 void Lint::visitExtractElementInst(ExtractElementInst &I) {
    786   if (ConstantInt *CI = dyn_cast<ConstantInt>(
    787           findValue(I.getIndexOperand(), I.getModule()->getDataLayout(),
    788                     /*OffsetOk=*/false)))
    789     Assert(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
    790            "Undefined result: extractelement index out of range", &I);
    791 }
    792 
    793 void Lint::visitInsertElementInst(InsertElementInst &I) {
    794   if (ConstantInt *CI = dyn_cast<ConstantInt>(
    795           findValue(I.getOperand(2), I.getModule()->getDataLayout(),
    796                     /*OffsetOk=*/false)))
    797     Assert(CI->getValue().ult(I.getType()->getNumElements()),
    798            "Undefined result: insertelement index out of range", &I);
    799 }
    800 
    801 void Lint::visitUnreachableInst(UnreachableInst &I) {
    802   // This isn't undefined behavior, it's merely suspicious.
    803   Assert(&I == I.getParent()->begin() ||
    804              std::prev(BasicBlock::iterator(&I))->mayHaveSideEffects(),
    805          "Unusual: unreachable immediately preceded by instruction without "
    806          "side effects",
    807          &I);
    808 }
    809 
    810 /// findValue - Look through bitcasts and simple memory reference patterns
    811 /// to identify an equivalent, but more informative, value.  If OffsetOk
    812 /// is true, look through getelementptrs with non-zero offsets too.
    813 ///
    814 /// Most analysis passes don't require this logic, because instcombine
    815 /// will simplify most of these kinds of things away. But it's a goal of
    816 /// this Lint pass to be useful even on non-optimized IR.
    817 Value *Lint::findValue(Value *V, const DataLayout &DL, bool OffsetOk) const {
    818   SmallPtrSet<Value *, 4> Visited;
    819   return findValueImpl(V, DL, OffsetOk, Visited);
    820 }
    821 
    822 /// findValueImpl - Implementation helper for findValue.
    823 Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk,
    824                            SmallPtrSetImpl<Value *> &Visited) const {
    825   // Detect self-referential values.
    826   if (!Visited.insert(V).second)
    827     return UndefValue::get(V->getType());
    828 
    829   // TODO: Look through sext or zext cast, when the result is known to
    830   // be interpreted as signed or unsigned, respectively.
    831   // TODO: Look through eliminable cast pairs.
    832   // TODO: Look through calls with unique return values.
    833   // TODO: Look through vector insert/extract/shuffle.
    834   V = OffsetOk ? GetUnderlyingObject(V, DL) : V->stripPointerCasts();
    835   if (LoadInst *L = dyn_cast<LoadInst>(V)) {
    836     BasicBlock::iterator BBI = L;
    837     BasicBlock *BB = L->getParent();
    838     SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
    839     for (;;) {
    840       if (!VisitedBlocks.insert(BB).second)
    841         break;
    842       if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(),
    843                                               BB, BBI, 6, AA))
    844         return findValueImpl(U, DL, OffsetOk, Visited);
    845       if (BBI != BB->begin()) break;
    846       BB = BB->getUniquePredecessor();
    847       if (!BB) break;
    848       BBI = BB->end();
    849     }
    850   } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
    851     if (Value *W = PN->hasConstantValue())
    852       if (W != V)
    853         return findValueImpl(W, DL, OffsetOk, Visited);
    854   } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
    855     if (CI->isNoopCast(DL))
    856       return findValueImpl(CI->getOperand(0), DL, OffsetOk, Visited);
    857   } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
    858     if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
    859                                      Ex->getIndices()))
    860       if (W != V)
    861         return findValueImpl(W, DL, OffsetOk, Visited);
    862   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
    863     // Same as above, but for ConstantExpr instead of Instruction.
    864     if (Instruction::isCast(CE->getOpcode())) {
    865       if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),
    866                                CE->getOperand(0)->getType(), CE->getType(),
    867                                DL.getIntPtrType(V->getType())))
    868         return findValueImpl(CE->getOperand(0), DL, OffsetOk, Visited);
    869     } else if (CE->getOpcode() == Instruction::ExtractValue) {
    870       ArrayRef<unsigned> Indices = CE->getIndices();
    871       if (Value *W = FindInsertedValue(CE->getOperand(0), Indices))
    872         if (W != V)
    873           return findValueImpl(W, DL, OffsetOk, Visited);
    874     }
    875   }
    876 
    877   // As a last resort, try SimplifyInstruction or constant folding.
    878   if (Instruction *Inst = dyn_cast<Instruction>(V)) {
    879     if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT, AC))
    880       return findValueImpl(W, DL, OffsetOk, Visited);
    881   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
    882     if (Value *W = ConstantFoldConstantExpression(CE, DL, TLI))
    883       if (W != V)
    884         return findValueImpl(W, DL, OffsetOk, Visited);
    885   }
    886 
    887   return V;
    888 }
    889 
    890 //===----------------------------------------------------------------------===//
    891 //  Implement the public interfaces to this file...
    892 //===----------------------------------------------------------------------===//
    893 
    894 FunctionPass *llvm::createLintPass() {
    895   return new Lint();
    896 }
    897 
    898 /// lintFunction - Check a function for errors, printing messages on stderr.
    899 ///
    900 void llvm::lintFunction(const Function &f) {
    901   Function &F = const_cast<Function&>(f);
    902   assert(!F.isDeclaration() && "Cannot lint external functions");
    903 
    904   legacy::FunctionPassManager FPM(F.getParent());
    905   Lint *V = new Lint();
    906   FPM.add(V);
    907   FPM.run(F);
    908 }
    909 
    910 /// lintModule - Check a module for errors, printing messages on stderr.
    911 ///
    912 void llvm::lintModule(const Module &M) {
    913   legacy::PassManager PM;
    914   Lint *V = new Lint();
    915   PM.add(V);
    916   PM.run(const_cast<Module&>(M));
    917 }
    918