Home | History | Annotate | Download | only in Checkers
      1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This checker defines the attack surface for generic taint propagation.
     11 //
     12 // The taint information produced by it might be useful to other checkers. For
     13 // example, checkers should report errors which involve tainted data more
     14 // aggressively, even if the involved symbols are under constrained.
     15 //
     16 //===----------------------------------------------------------------------===//
     17 #include "ClangSACheckers.h"
     18 #include "clang/StaticAnalyzer/Core/Checker.h"
     19 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
     20 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
     21 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
     22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
     23 #include "clang/Basic/Builtins.h"
     24 #include <climits>
     25 
     26 using namespace clang;
     27 using namespace ento;
     28 
     29 namespace {
     30 class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
     31                                             check::PreStmt<CallExpr> > {
     32 public:
     33   static void *getTag() { static int Tag; return &Tag; }
     34 
     35   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
     36   void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
     37 
     38   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
     39 
     40 private:
     41   static const unsigned InvalidArgIndex = UINT_MAX;
     42   /// Denotes the return vale.
     43   static const unsigned ReturnValueIndex = UINT_MAX - 1;
     44 
     45   mutable OwningPtr<BugType> BT;
     46   inline void initBugType() const {
     47     if (!BT)
     48       BT.reset(new BugType("Use of Untrusted Data", "Untrusted Data"));
     49   }
     50 
     51   /// \brief Catch taint related bugs. Check if tainted data is passed to a
     52   /// system call etc.
     53   bool checkPre(const CallExpr *CE, CheckerContext &C) const;
     54 
     55   /// \brief Add taint sources on a pre-visit.
     56   void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
     57 
     58   /// \brief Propagate taint generated at pre-visit.
     59   bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
     60 
     61   /// \brief Add taint sources on a post visit.
     62   void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
     63 
     64   /// Check if the region the expression evaluates to is the standard input,
     65   /// and thus, is tainted.
     66   static bool isStdin(const Expr *E, CheckerContext &C);
     67 
     68   /// \brief Given a pointer argument, get the symbol of the value it contains
     69   /// (points to).
     70   static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
     71 
     72   /// Functions defining the attack surface.
     73   typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
     74                                                        CheckerContext &C) const;
     75   ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
     76   ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
     77   ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
     78 
     79   /// Taint the scanned input if the file is tainted.
     80   ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
     81 
     82   /// Check for CWE-134: Uncontrolled Format String.
     83   static const char MsgUncontrolledFormatString[];
     84   bool checkUncontrolledFormatString(const CallExpr *CE,
     85                                      CheckerContext &C) const;
     86 
     87   /// Check for:
     88   /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
     89   /// CWE-78, "Failure to Sanitize Data into an OS Command"
     90   static const char MsgSanitizeSystemArgs[];
     91   bool checkSystemCall(const CallExpr *CE, StringRef Name,
     92                        CheckerContext &C) const;
     93 
     94   /// Check if tainted data is used as a buffer size ins strn.. functions,
     95   /// and allocators.
     96   static const char MsgTaintedBufferSize[];
     97   bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
     98                               CheckerContext &C) const;
     99 
    100   /// Generate a report if the expression is tainted or points to tainted data.
    101   bool generateReportIfTainted(const Expr *E, const char Msg[],
    102                                CheckerContext &C) const;
    103 
    104 
    105   typedef llvm::SmallVector<unsigned, 2> ArgVector;
    106 
    107   /// \brief A struct used to specify taint propagation rules for a function.
    108   ///
    109   /// If any of the possible taint source arguments is tainted, all of the
    110   /// destination arguments should also be tainted. Use InvalidArgIndex in the
    111   /// src list to specify that all of the arguments can introduce taint. Use
    112   /// InvalidArgIndex in the dst arguments to signify that all the non-const
    113   /// pointer and reference arguments might be tainted on return. If
    114   /// ReturnValueIndex is added to the dst list, the return value will be
    115   /// tainted.
    116   struct TaintPropagationRule {
    117     /// List of arguments which can be taint sources and should be checked.
    118     ArgVector SrcArgs;
    119     /// List of arguments which should be tainted on function return.
    120     ArgVector DstArgs;
    121     // TODO: Check if using other data structures would be more optimal.
    122 
    123     TaintPropagationRule() {}
    124 
    125     TaintPropagationRule(unsigned SArg,
    126                          unsigned DArg, bool TaintRet = false) {
    127       SrcArgs.push_back(SArg);
    128       DstArgs.push_back(DArg);
    129       if (TaintRet)
    130         DstArgs.push_back(ReturnValueIndex);
    131     }
    132 
    133     TaintPropagationRule(unsigned SArg1, unsigned SArg2,
    134                          unsigned DArg, bool TaintRet = false) {
    135       SrcArgs.push_back(SArg1);
    136       SrcArgs.push_back(SArg2);
    137       DstArgs.push_back(DArg);
    138       if (TaintRet)
    139         DstArgs.push_back(ReturnValueIndex);
    140     }
    141 
    142     /// Get the propagation rule for a given function.
    143     static TaintPropagationRule
    144       getTaintPropagationRule(const FunctionDecl *FDecl,
    145                               StringRef Name,
    146                               CheckerContext &C);
    147 
    148     inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
    149     inline void addDstArg(unsigned A)  { DstArgs.push_back(A); }
    150 
    151     inline bool isNull() const { return SrcArgs.empty(); }
    152 
    153     inline bool isDestinationArgument(unsigned ArgNum) const {
    154       return (std::find(DstArgs.begin(),
    155                         DstArgs.end(), ArgNum) != DstArgs.end());
    156     }
    157 
    158     static inline bool isTaintedOrPointsToTainted(const Expr *E,
    159                                                   ProgramStateRef State,
    160                                                   CheckerContext &C) {
    161       return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
    162               (E->getType().getTypePtr()->isPointerType() &&
    163                State->isTainted(getPointedToSymbol(C, E))));
    164     }
    165 
    166     /// \brief Pre-process a function which propagates taint according to the
    167     /// taint rule.
    168     ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
    169 
    170   };
    171 };
    172 
    173 const unsigned GenericTaintChecker::ReturnValueIndex;
    174 const unsigned GenericTaintChecker::InvalidArgIndex;
    175 
    176 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
    177   "Untrusted data is used as a format string "
    178   "(CWE-134: Uncontrolled Format String)";
    179 
    180 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
    181   "Untrusted data is passed to a system call "
    182   "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
    183 
    184 const char GenericTaintChecker::MsgTaintedBufferSize[] =
    185   "Untrusted data is used to specify the buffer size "
    186   "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
    187   "character data and the null terminator)";
    188 
    189 } // end of anonymous namespace
    190 
    191 /// A set which is used to pass information from call pre-visit instruction
    192 /// to the call post-visit. The values are unsigned integers, which are either
    193 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
    194 /// points to data, which should be tainted on return.
    195 namespace { struct TaintArgsOnPostVisit{}; }
    196 namespace clang { namespace ento {
    197 template<> struct ProgramStateTrait<TaintArgsOnPostVisit>
    198     :  public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > {
    199   static void *GDMIndex() { return GenericTaintChecker::getTag(); }
    200 };
    201 }}
    202 
    203 GenericTaintChecker::TaintPropagationRule
    204 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
    205                                                      const FunctionDecl *FDecl,
    206                                                      StringRef Name,
    207                                                      CheckerContext &C) {
    208   // TODO: Currently, we might loose precision here: we always mark a return
    209   // value as tainted even if it's just a pointer, pointing to tainted data.
    210 
    211   // Check for exact name match for functions without builtin substitutes.
    212   TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
    213     .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
    214     .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
    215     .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
    216     .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
    217     .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
    218     .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
    219     .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
    220     .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
    221     .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
    222     .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
    223     .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
    224     .Case("read", TaintPropagationRule(0, 2, 1, true))
    225     .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
    226     .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
    227     .Case("fgets", TaintPropagationRule(2, 0, true))
    228     .Case("getline", TaintPropagationRule(2, 0))
    229     .Case("getdelim", TaintPropagationRule(3, 0))
    230     .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
    231     .Default(TaintPropagationRule());
    232 
    233   if (!Rule.isNull())
    234     return Rule;
    235 
    236   // Check if it's one of the memory setting/copying functions.
    237   // This check is specialized but faster then calling isCLibraryFunction.
    238   unsigned BId = 0;
    239   if ( (BId = FDecl->getMemoryFunctionKind()) )
    240     switch(BId) {
    241     case Builtin::BImemcpy:
    242     case Builtin::BImemmove:
    243     case Builtin::BIstrncpy:
    244     case Builtin::BIstrncat:
    245       return TaintPropagationRule(1, 2, 0, true);
    246     case Builtin::BIstrlcpy:
    247     case Builtin::BIstrlcat:
    248       return TaintPropagationRule(1, 2, 0, false);
    249     case Builtin::BIstrndup:
    250       return TaintPropagationRule(0, 1, ReturnValueIndex);
    251 
    252     default:
    253       break;
    254     };
    255 
    256   // Process all other functions which could be defined as builtins.
    257   if (Rule.isNull()) {
    258     if (C.isCLibraryFunction(FDecl, "snprintf") ||
    259         C.isCLibraryFunction(FDecl, "sprintf"))
    260       return TaintPropagationRule(InvalidArgIndex, 0, true);
    261     else if (C.isCLibraryFunction(FDecl, "strcpy") ||
    262              C.isCLibraryFunction(FDecl, "stpcpy") ||
    263              C.isCLibraryFunction(FDecl, "strcat"))
    264       return TaintPropagationRule(1, 0, true);
    265     else if (C.isCLibraryFunction(FDecl, "bcopy"))
    266       return TaintPropagationRule(0, 2, 1, false);
    267     else if (C.isCLibraryFunction(FDecl, "strdup") ||
    268              C.isCLibraryFunction(FDecl, "strdupa"))
    269       return TaintPropagationRule(0, ReturnValueIndex);
    270     else if (C.isCLibraryFunction(FDecl, "wcsdup"))
    271       return TaintPropagationRule(0, ReturnValueIndex);
    272   }
    273 
    274   // Skipping the following functions, since they might be used for cleansing
    275   // or smart memory copy:
    276   // - memccpy - copying until hitting a special character.
    277 
    278   return TaintPropagationRule();
    279 }
    280 
    281 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
    282                                        CheckerContext &C) const {
    283   // Check for errors first.
    284   if (checkPre(CE, C))
    285     return;
    286 
    287   // Add taint second.
    288   addSourcesPre(CE, C);
    289 }
    290 
    291 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
    292                                         CheckerContext &C) const {
    293   if (propagateFromPre(CE, C))
    294     return;
    295   addSourcesPost(CE, C);
    296 }
    297 
    298 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
    299                                         CheckerContext &C) const {
    300   ProgramStateRef State = 0;
    301   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
    302   if (!FDecl || FDecl->getKind() != Decl::Function)
    303     return;
    304 
    305   StringRef Name = C.getCalleeName(FDecl);
    306   if (Name.empty())
    307     return;
    308 
    309   // First, try generating a propagation rule for this function.
    310   TaintPropagationRule Rule =
    311     TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
    312   if (!Rule.isNull()) {
    313     State = Rule.process(CE, C);
    314     if (!State)
    315       return;
    316     C.addTransition(State);
    317     return;
    318   }
    319 
    320   // Otherwise, check if we have custom pre-processing implemented.
    321   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
    322     .Case("fscanf", &GenericTaintChecker::preFscanf)
    323     .Default(0);
    324   // Check and evaluate the call.
    325   if (evalFunction)
    326     State = (this->*evalFunction)(CE, C);
    327   if (!State)
    328     return;
    329   C.addTransition(State);
    330 
    331 }
    332 
    333 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
    334                                            CheckerContext &C) const {
    335   ProgramStateRef State = C.getState();
    336 
    337   // Depending on what was tainted at pre-visit, we determined a set of
    338   // arguments which should be tainted after the function returns. These are
    339   // stored in the state as TaintArgsOnPostVisit set.
    340   llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>();
    341   if (TaintArgs.isEmpty())
    342     return false;
    343 
    344   for (llvm::ImmutableSet<unsigned>::iterator
    345          I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
    346     unsigned ArgNum  = *I;
    347 
    348     // Special handling for the tainted return value.
    349     if (ArgNum == ReturnValueIndex) {
    350       State = State->addTaint(CE, C.getLocationContext());
    351       continue;
    352     }
    353 
    354     // The arguments are pointer arguments. The data they are pointing at is
    355     // tainted after the call.
    356     if (CE->getNumArgs() < (ArgNum + 1))
    357       return false;
    358     const Expr* Arg = CE->getArg(ArgNum);
    359     SymbolRef Sym = getPointedToSymbol(C, Arg);
    360     if (Sym)
    361       State = State->addTaint(Sym);
    362   }
    363 
    364   // Clear up the taint info from the state.
    365   State = State->remove<TaintArgsOnPostVisit>();
    366 
    367   if (State != C.getState()) {
    368     C.addTransition(State);
    369     return true;
    370   }
    371   return false;
    372 }
    373 
    374 void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
    375                                          CheckerContext &C) const {
    376   // Define the attack surface.
    377   // Set the evaluation function by switching on the callee name.
    378   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
    379   if (!FDecl || FDecl->getKind() != Decl::Function)
    380     return;
    381 
    382   StringRef Name = C.getCalleeName(FDecl);
    383   if (Name.empty())
    384     return;
    385   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
    386     .Case("scanf", &GenericTaintChecker::postScanf)
    387     // TODO: Add support for vfscanf & family.
    388     .Case("getchar", &GenericTaintChecker::postRetTaint)
    389     .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
    390     .Case("getenv", &GenericTaintChecker::postRetTaint)
    391     .Case("fopen", &GenericTaintChecker::postRetTaint)
    392     .Case("fdopen", &GenericTaintChecker::postRetTaint)
    393     .Case("freopen", &GenericTaintChecker::postRetTaint)
    394     .Case("getch", &GenericTaintChecker::postRetTaint)
    395     .Case("wgetch", &GenericTaintChecker::postRetTaint)
    396     .Case("socket", &GenericTaintChecker::postSocket)
    397     .Default(0);
    398 
    399   // If the callee isn't defined, it is not of security concern.
    400   // Check and evaluate the call.
    401   ProgramStateRef State = 0;
    402   if (evalFunction)
    403     State = (this->*evalFunction)(CE, C);
    404   if (!State)
    405     return;
    406 
    407   C.addTransition(State);
    408 }
    409 
    410 bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
    411 
    412   if (checkUncontrolledFormatString(CE, C))
    413     return true;
    414 
    415   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
    416   if (!FDecl || FDecl->getKind() != Decl::Function)
    417     return false;
    418 
    419   StringRef Name = C.getCalleeName(FDecl);
    420   if (Name.empty())
    421     return false;
    422 
    423   if (checkSystemCall(CE, Name, C))
    424     return true;
    425 
    426   if (checkTaintedBufferSize(CE, FDecl, C))
    427     return true;
    428 
    429   return false;
    430 }
    431 
    432 SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
    433                                                   const Expr* Arg) {
    434   ProgramStateRef State = C.getState();
    435   SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
    436   if (AddrVal.isUnknownOrUndef())
    437     return 0;
    438 
    439   Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
    440   if (!AddrLoc)
    441     return 0;
    442 
    443   const PointerType *ArgTy =
    444     dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
    445   SVal Val = State->getSVal(*AddrLoc,
    446                             ArgTy ? ArgTy->getPointeeType(): QualType());
    447   return Val.getAsSymbol();
    448 }
    449 
    450 ProgramStateRef
    451 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
    452                                                    CheckerContext &C) const {
    453   ProgramStateRef State = C.getState();
    454 
    455   // Check for taint in arguments.
    456   bool IsTainted = false;
    457   for (ArgVector::const_iterator I = SrcArgs.begin(),
    458                                  E = SrcArgs.end(); I != E; ++I) {
    459     unsigned ArgNum = *I;
    460 
    461     if (ArgNum == InvalidArgIndex) {
    462       // Check if any of the arguments is tainted, but skip the
    463       // destination arguments.
    464       for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
    465         if (isDestinationArgument(i))
    466           continue;
    467         if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
    468           break;
    469       }
    470       break;
    471     }
    472 
    473     if (CE->getNumArgs() < (ArgNum + 1))
    474       return State;
    475     if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
    476       break;
    477   }
    478   if (!IsTainted)
    479     return State;
    480 
    481   // Mark the arguments which should be tainted after the function returns.
    482   for (ArgVector::const_iterator I = DstArgs.begin(),
    483                                  E = DstArgs.end(); I != E; ++I) {
    484     unsigned ArgNum = *I;
    485 
    486     // Should we mark all arguments as tainted?
    487     if (ArgNum == InvalidArgIndex) {
    488       // For all pointer and references that were passed in:
    489       //   If they are not pointing to const data, mark data as tainted.
    490       //   TODO: So far we are just going one level down; ideally we'd need to
    491       //         recurse here.
    492       for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
    493         const Expr *Arg = CE->getArg(i);
    494         // Process pointer argument.
    495         const Type *ArgTy = Arg->getType().getTypePtr();
    496         QualType PType = ArgTy->getPointeeType();
    497         if ((!PType.isNull() && !PType.isConstQualified())
    498             || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
    499           State = State->add<TaintArgsOnPostVisit>(i);
    500       }
    501       continue;
    502     }
    503 
    504     // Should mark the return value?
    505     if (ArgNum == ReturnValueIndex) {
    506       State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
    507       continue;
    508     }
    509 
    510     // Mark the given argument.
    511     assert(ArgNum < CE->getNumArgs());
    512     State = State->add<TaintArgsOnPostVisit>(ArgNum);
    513   }
    514 
    515   return State;
    516 }
    517 
    518 
    519 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0
    520 // and arg 1 should get taint.
    521 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
    522                                                    CheckerContext &C) const {
    523   assert(CE->getNumArgs() >= 2);
    524   ProgramStateRef State = C.getState();
    525 
    526   // Check is the file descriptor is tainted.
    527   if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
    528       isStdin(CE->getArg(0), C)) {
    529     // All arguments except for the first two should get taint.
    530     for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
    531         State = State->add<TaintArgsOnPostVisit>(i);
    532     return State;
    533   }
    534 
    535   return 0;
    536 }
    537 
    538 
    539 // If argument 0(protocol domain) is network, the return value should get taint.
    540 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
    541                                                 CheckerContext &C) const {
    542   ProgramStateRef State = C.getState();
    543   if (CE->getNumArgs() < 3)
    544     return State;
    545 
    546   SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
    547   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
    548   // White list the internal communication protocols.
    549   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
    550       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
    551     return State;
    552   State = State->addTaint(CE, C.getLocationContext());
    553   return State;
    554 }
    555 
    556 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
    557                                                    CheckerContext &C) const {
    558   ProgramStateRef State = C.getState();
    559   if (CE->getNumArgs() < 2)
    560     return State;
    561 
    562   // All arguments except for the very first one should get taint.
    563   for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
    564     // The arguments are pointer arguments. The data they are pointing at is
    565     // tainted after the call.
    566     const Expr* Arg = CE->getArg(i);
    567         SymbolRef Sym = getPointedToSymbol(C, Arg);
    568     if (Sym)
    569       State = State->addTaint(Sym);
    570   }
    571   return State;
    572 }
    573 
    574 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
    575                                                   CheckerContext &C) const {
    576   return C.getState()->addTaint(CE, C.getLocationContext());
    577 }
    578 
    579 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
    580   ProgramStateRef State = C.getState();
    581   SVal Val = State->getSVal(E, C.getLocationContext());
    582 
    583   // stdin is a pointer, so it would be a region.
    584   const MemRegion *MemReg = Val.getAsRegion();
    585 
    586   // The region should be symbolic, we do not know it's value.
    587   const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
    588   if (!SymReg)
    589     return false;
    590 
    591   // Get it's symbol and find the declaration region it's pointing to.
    592   const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
    593   if (!Sm)
    594     return false;
    595   const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
    596   if (!DeclReg)
    597     return false;
    598 
    599   // This region corresponds to a declaration, find out if it's a global/extern
    600   // variable named stdin with the proper type.
    601   if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
    602     D = D->getCanonicalDecl();
    603     if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
    604         if (const PointerType * PtrTy =
    605               dyn_cast<PointerType>(D->getType().getTypePtr()))
    606           if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
    607             return true;
    608   }
    609   return false;
    610 }
    611 
    612 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
    613                                        const CheckerContext &C,
    614                                        unsigned int &ArgNum) {
    615   // Find if the function contains a format string argument.
    616   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
    617   // vsnprintf, syslog, custom annotated functions.
    618   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
    619   if (!FDecl)
    620     return false;
    621   for (specific_attr_iterator<FormatAttr>
    622          i = FDecl->specific_attr_begin<FormatAttr>(),
    623          e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
    624 
    625     const FormatAttr *Format = *i;
    626     ArgNum = Format->getFormatIdx() - 1;
    627     if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
    628       return true;
    629   }
    630 
    631   // Or if a function is named setproctitle (this is a heuristic).
    632   if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
    633     ArgNum = 0;
    634     return true;
    635   }
    636 
    637   return false;
    638 }
    639 
    640 bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
    641                                                   const char Msg[],
    642                                                   CheckerContext &C) const {
    643   assert(E);
    644 
    645   // Check for taint.
    646   ProgramStateRef State = C.getState();
    647   if (!State->isTainted(getPointedToSymbol(C, E)) &&
    648       !State->isTainted(E, C.getLocationContext()))
    649     return false;
    650 
    651   // Generate diagnostic.
    652   if (ExplodedNode *N = C.addTransition()) {
    653     initBugType();
    654     BugReport *report = new BugReport(*BT, Msg, N);
    655     report->addRange(E->getSourceRange());
    656     C.EmitReport(report);
    657     return true;
    658   }
    659   return false;
    660 }
    661 
    662 bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
    663                                                         CheckerContext &C) const{
    664   // Check if the function contains a format string argument.
    665   unsigned int ArgNum = 0;
    666   if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
    667     return false;
    668 
    669   // If either the format string content or the pointer itself are tainted, warn.
    670   if (generateReportIfTainted(CE->getArg(ArgNum),
    671                               MsgUncontrolledFormatString, C))
    672     return true;
    673   return false;
    674 }
    675 
    676 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
    677                                           StringRef Name,
    678                                           CheckerContext &C) const {
    679   // TODO: It might make sense to run this check on demand. In some cases,
    680   // we should check if the environment has been cleansed here. We also might
    681   // need to know if the user was reset before these calls(seteuid).
    682   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
    683     .Case("system", 0)
    684     .Case("popen", 0)
    685     .Case("execl", 0)
    686     .Case("execle", 0)
    687     .Case("execlp", 0)
    688     .Case("execv", 0)
    689     .Case("execvp", 0)
    690     .Case("execvP", 0)
    691     .Case("execve", 0)
    692     .Case("dlopen", 0)
    693     .Default(UINT_MAX);
    694 
    695   if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
    696     return false;
    697 
    698   if (generateReportIfTainted(CE->getArg(ArgNum),
    699                               MsgSanitizeSystemArgs, C))
    700     return true;
    701 
    702   return false;
    703 }
    704 
    705 // TODO: Should this check be a part of the CString checker?
    706 // If yes, should taint be a global setting?
    707 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
    708                                                  const FunctionDecl *FDecl,
    709                                                  CheckerContext &C) const {
    710   // If the function has a buffer size argument, set ArgNum.
    711   unsigned ArgNum = InvalidArgIndex;
    712   unsigned BId = 0;
    713   if ( (BId = FDecl->getMemoryFunctionKind()) )
    714     switch(BId) {
    715     case Builtin::BImemcpy:
    716     case Builtin::BImemmove:
    717     case Builtin::BIstrncpy:
    718       ArgNum = 2;
    719       break;
    720     case Builtin::BIstrndup:
    721       ArgNum = 1;
    722       break;
    723     default:
    724       break;
    725     };
    726 
    727   if (ArgNum == InvalidArgIndex) {
    728     if (C.isCLibraryFunction(FDecl, "malloc") ||
    729         C.isCLibraryFunction(FDecl, "calloc") ||
    730         C.isCLibraryFunction(FDecl, "alloca"))
    731       ArgNum = 0;
    732     else if (C.isCLibraryFunction(FDecl, "memccpy"))
    733       ArgNum = 3;
    734     else if (C.isCLibraryFunction(FDecl, "realloc"))
    735       ArgNum = 1;
    736     else if (C.isCLibraryFunction(FDecl, "bcopy"))
    737       ArgNum = 2;
    738   }
    739 
    740   if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
    741       generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C))
    742     return true;
    743 
    744   return false;
    745 }
    746 
    747 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
    748   mgr.registerChecker<GenericTaintChecker>();
    749 }
    750