Home | History | Annotate | Download | only in Checkers
      1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This checker defines the attack surface for generic taint propagation.
     11 //
     12 // The taint information produced by it might be useful to other checkers. For
     13 // example, checkers should report errors which involve tainted data more
     14 // aggressively, even if the involved symbols are under constrained.
     15 //
     16 //===----------------------------------------------------------------------===//
     17 #include "ClangSACheckers.h"
     18 #include "clang/StaticAnalyzer/Core/Checker.h"
     19 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
     20 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
     21 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
     22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
     23 #include "clang/Basic/Builtins.h"
     24 #include <climits>
     25 
     26 using namespace clang;
     27 using namespace ento;
     28 
     29 namespace {
     30 class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
     31                                             check::PreStmt<CallExpr> > {
     32 public:
     33   static void *getTag() { static int Tag; return &Tag; }
     34 
     35   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
     36   void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
     37 
     38   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
     39 
     40 private:
     41   static const unsigned InvalidArgIndex = UINT_MAX;
     42   /// Denotes the return vale.
     43   static const unsigned ReturnValueIndex = UINT_MAX - 1;
     44 
     45   mutable OwningPtr<BugType> BT;
     46   inline void initBugType() const {
     47     if (!BT)
     48       BT.reset(new BugType("Use of Untrusted Data", "Untrusted Data"));
     49   }
     50 
     51   /// \brief Catch taint related bugs. Check if tainted data is passed to a
     52   /// system call etc.
     53   bool checkPre(const CallExpr *CE, CheckerContext &C) const;
     54 
     55   /// \brief Add taint sources on a pre-visit.
     56   void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
     57 
     58   /// \brief Propagate taint generated at pre-visit.
     59   bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
     60 
     61   /// \brief Add taint sources on a post visit.
     62   void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
     63 
     64   /// Check if the region the expression evaluates to is the standard input,
     65   /// and thus, is tainted.
     66   static bool isStdin(const Expr *E, CheckerContext &C);
     67 
     68   /// \brief Given a pointer argument, get the symbol of the value it contains
     69   /// (points to).
     70   static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
     71 
     72   /// Functions defining the attack surface.
     73   typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
     74                                                        CheckerContext &C) const;
     75   ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
     76   ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
     77   ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
     78 
     79   /// Taint the scanned input if the file is tainted.
     80   ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
     81 
     82   /// Check for CWE-134: Uncontrolled Format String.
     83   static const char MsgUncontrolledFormatString[];
     84   bool checkUncontrolledFormatString(const CallExpr *CE,
     85                                      CheckerContext &C) const;
     86 
     87   /// Check for:
     88   /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
     89   /// CWE-78, "Failure to Sanitize Data into an OS Command"
     90   static const char MsgSanitizeSystemArgs[];
     91   bool checkSystemCall(const CallExpr *CE, StringRef Name,
     92                        CheckerContext &C) const;
     93 
     94   /// Check if tainted data is used as a buffer size ins strn.. functions,
     95   /// and allocators.
     96   static const char MsgTaintedBufferSize[];
     97   bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
     98                               CheckerContext &C) const;
     99 
    100   /// Generate a report if the expression is tainted or points to tainted data.
    101   bool generateReportIfTainted(const Expr *E, const char Msg[],
    102                                CheckerContext &C) const;
    103 
    104 
    105   typedef llvm::SmallVector<unsigned, 2> ArgVector;
    106 
    107   /// \brief A struct used to specify taint propagation rules for a function.
    108   ///
    109   /// If any of the possible taint source arguments is tainted, all of the
    110   /// destination arguments should also be tainted. Use InvalidArgIndex in the
    111   /// src list to specify that all of the arguments can introduce taint. Use
    112   /// InvalidArgIndex in the dst arguments to signify that all the non-const
    113   /// pointer and reference arguments might be tainted on return. If
    114   /// ReturnValueIndex is added to the dst list, the return value will be
    115   /// tainted.
    116   struct TaintPropagationRule {
    117     /// List of arguments which can be taint sources and should be checked.
    118     ArgVector SrcArgs;
    119     /// List of arguments which should be tainted on function return.
    120     ArgVector DstArgs;
    121     // TODO: Check if using other data structures would be more optimal.
    122 
    123     TaintPropagationRule() {}
    124 
    125     TaintPropagationRule(unsigned SArg,
    126                          unsigned DArg, bool TaintRet = false) {
    127       SrcArgs.push_back(SArg);
    128       DstArgs.push_back(DArg);
    129       if (TaintRet)
    130         DstArgs.push_back(ReturnValueIndex);
    131     }
    132 
    133     TaintPropagationRule(unsigned SArg1, unsigned SArg2,
    134                          unsigned DArg, bool TaintRet = false) {
    135       SrcArgs.push_back(SArg1);
    136       SrcArgs.push_back(SArg2);
    137       DstArgs.push_back(DArg);
    138       if (TaintRet)
    139         DstArgs.push_back(ReturnValueIndex);
    140     }
    141 
    142     /// Get the propagation rule for a given function.
    143     static TaintPropagationRule
    144       getTaintPropagationRule(const FunctionDecl *FDecl,
    145                               StringRef Name,
    146                               CheckerContext &C);
    147 
    148     inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
    149     inline void addDstArg(unsigned A)  { DstArgs.push_back(A); }
    150 
    151     inline bool isNull() const { return SrcArgs.empty(); }
    152 
    153     inline bool isDestinationArgument(unsigned ArgNum) const {
    154       return (std::find(DstArgs.begin(),
    155                         DstArgs.end(), ArgNum) != DstArgs.end());
    156     }
    157 
    158     static inline bool isTaintedOrPointsToTainted(const Expr *E,
    159                                                   ProgramStateRef State,
    160                                                   CheckerContext &C) {
    161       return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
    162               (E->getType().getTypePtr()->isPointerType() &&
    163                State->isTainted(getPointedToSymbol(C, E))));
    164     }
    165 
    166     /// \brief Pre-process a function which propagates taint according to the
    167     /// taint rule.
    168     ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
    169 
    170   };
    171 };
    172 
    173 const unsigned GenericTaintChecker::ReturnValueIndex;
    174 const unsigned GenericTaintChecker::InvalidArgIndex;
    175 
    176 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
    177   "Untrusted data is used as a format string "
    178   "(CWE-134: Uncontrolled Format String)";
    179 
    180 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
    181   "Untrusted data is passed to a system call "
    182   "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
    183 
    184 const char GenericTaintChecker::MsgTaintedBufferSize[] =
    185   "Untrusted data is used to specify the buffer size "
    186   "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
    187   "character data and the null terminator)";
    188 
    189 } // end of anonymous namespace
    190 
    191 /// A set which is used to pass information from call pre-visit instruction
    192 /// to the call post-visit. The values are unsigned integers, which are either
    193 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
    194 /// points to data, which should be tainted on return.
    195 namespace { struct TaintArgsOnPostVisit{}; }
    196 namespace clang { namespace ento {
    197 template<> struct ProgramStateTrait<TaintArgsOnPostVisit>
    198     :  public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > {
    199   static void *GDMIndex() { return GenericTaintChecker::getTag(); }
    200 };
    201 }}
    202 
    203 GenericTaintChecker::TaintPropagationRule
    204 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
    205                                                      const FunctionDecl *FDecl,
    206                                                      StringRef Name,
    207                                                      CheckerContext &C) {
    208   // TODO: Currently, we might loose precision here: we always mark a return
    209   // value as tainted even if it's just a pointer, pointing to tainted data.
    210 
    211   // Check for exact name match for functions without builtin substitutes.
    212   TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
    213     .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
    214     .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
    215     .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
    216     .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
    217     .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
    218     .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
    219     .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
    220     .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
    221     .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
    222     .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
    223     .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
    224     .Case("read", TaintPropagationRule(0, 2, 1, true))
    225     .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
    226     .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
    227     .Case("fgets", TaintPropagationRule(2, 0, true))
    228     .Case("getline", TaintPropagationRule(2, 0))
    229     .Case("getdelim", TaintPropagationRule(3, 0))
    230     .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
    231     .Default(TaintPropagationRule());
    232 
    233   if (!Rule.isNull())
    234     return Rule;
    235 
    236   // Check if it's one of the memory setting/copying functions.
    237   // This check is specialized but faster then calling isCLibraryFunction.
    238   unsigned BId = 0;
    239   if ( (BId = FDecl->getMemoryFunctionKind()) )
    240     switch(BId) {
    241     case Builtin::BImemcpy:
    242     case Builtin::BImemmove:
    243     case Builtin::BIstrncpy:
    244     case Builtin::BIstrncat:
    245       return TaintPropagationRule(1, 2, 0, true);
    246     case Builtin::BIstrlcpy:
    247     case Builtin::BIstrlcat:
    248       return TaintPropagationRule(1, 2, 0, false);
    249     case Builtin::BIstrndup:
    250       return TaintPropagationRule(0, 1, ReturnValueIndex);
    251 
    252     default:
    253       break;
    254     };
    255 
    256   // Process all other functions which could be defined as builtins.
    257   if (Rule.isNull()) {
    258     if (C.isCLibraryFunction(FDecl, "snprintf") ||
    259         C.isCLibraryFunction(FDecl, "sprintf"))
    260       return TaintPropagationRule(InvalidArgIndex, 0, true);
    261     else if (C.isCLibraryFunction(FDecl, "strcpy") ||
    262              C.isCLibraryFunction(FDecl, "stpcpy") ||
    263              C.isCLibraryFunction(FDecl, "strcat"))
    264       return TaintPropagationRule(1, 0, true);
    265     else if (C.isCLibraryFunction(FDecl, "bcopy"))
    266       return TaintPropagationRule(0, 2, 1, false);
    267     else if (C.isCLibraryFunction(FDecl, "strdup") ||
    268              C.isCLibraryFunction(FDecl, "strdupa"))
    269       return TaintPropagationRule(0, ReturnValueIndex);
    270     else if (C.isCLibraryFunction(FDecl, "wcsdup"))
    271       return TaintPropagationRule(0, ReturnValueIndex);
    272   }
    273 
    274   // Skipping the following functions, since they might be used for cleansing
    275   // or smart memory copy:
    276   // - memccpy - copying untill hitting a special character.
    277 
    278   return TaintPropagationRule();
    279 }
    280 
    281 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
    282                                        CheckerContext &C) const {
    283   // Check for errors first.
    284   if (checkPre(CE, C))
    285     return;
    286 
    287   // Add taint second.
    288   addSourcesPre(CE, C);
    289 }
    290 
    291 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
    292                                         CheckerContext &C) const {
    293   if (propagateFromPre(CE, C))
    294     return;
    295   addSourcesPost(CE, C);
    296 }
    297 
    298 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
    299                                         CheckerContext &C) const {
    300   ProgramStateRef State = 0;
    301   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
    302   StringRef Name = C.getCalleeName(FDecl);
    303   if (Name.empty())
    304     return;
    305 
    306   // First, try generating a propagation rule for this function.
    307   TaintPropagationRule Rule =
    308     TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
    309   if (!Rule.isNull()) {
    310     State = Rule.process(CE, C);
    311     if (!State)
    312       return;
    313     C.addTransition(State);
    314     return;
    315   }
    316 
    317   // Otherwise, check if we have custom pre-processing implemented.
    318   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
    319     .Case("fscanf", &GenericTaintChecker::preFscanf)
    320     .Default(0);
    321   // Check and evaluate the call.
    322   if (evalFunction)
    323     State = (this->*evalFunction)(CE, C);
    324   if (!State)
    325     return;
    326   C.addTransition(State);
    327 
    328 }
    329 
    330 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
    331                                            CheckerContext &C) const {
    332   ProgramStateRef State = C.getState();
    333 
    334   // Depending on what was tainted at pre-visit, we determined a set of
    335   // arguments which should be tainted after the function returns. These are
    336   // stored in the state as TaintArgsOnPostVisit set.
    337   llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>();
    338   if (TaintArgs.isEmpty())
    339     return false;
    340 
    341   for (llvm::ImmutableSet<unsigned>::iterator
    342          I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
    343     unsigned ArgNum  = *I;
    344 
    345     // Special handling for the tainted return value.
    346     if (ArgNum == ReturnValueIndex) {
    347       State = State->addTaint(CE, C.getLocationContext());
    348       continue;
    349     }
    350 
    351     // The arguments are pointer arguments. The data they are pointing at is
    352     // tainted after the call.
    353     if (CE->getNumArgs() < (ArgNum + 1))
    354       return false;
    355     const Expr* Arg = CE->getArg(ArgNum);
    356     SymbolRef Sym = getPointedToSymbol(C, Arg);
    357     if (Sym)
    358       State = State->addTaint(Sym);
    359   }
    360 
    361   // Clear up the taint info from the state.
    362   State = State->remove<TaintArgsOnPostVisit>();
    363 
    364   if (State != C.getState()) {
    365     C.addTransition(State);
    366     return true;
    367   }
    368   return false;
    369 }
    370 
    371 void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
    372                                          CheckerContext &C) const {
    373   // Define the attack surface.
    374   // Set the evaluation function by switching on the callee name.
    375   StringRef Name = C.getCalleeName(CE);
    376   if (Name.empty())
    377     return;
    378   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
    379     .Case("scanf", &GenericTaintChecker::postScanf)
    380     // TODO: Add support for vfscanf & family.
    381     .Case("getchar", &GenericTaintChecker::postRetTaint)
    382     .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
    383     .Case("getenv", &GenericTaintChecker::postRetTaint)
    384     .Case("fopen", &GenericTaintChecker::postRetTaint)
    385     .Case("fdopen", &GenericTaintChecker::postRetTaint)
    386     .Case("freopen", &GenericTaintChecker::postRetTaint)
    387     .Case("getch", &GenericTaintChecker::postRetTaint)
    388     .Case("wgetch", &GenericTaintChecker::postRetTaint)
    389     .Case("socket", &GenericTaintChecker::postSocket)
    390     .Default(0);
    391 
    392   // If the callee isn't defined, it is not of security concern.
    393   // Check and evaluate the call.
    394   ProgramStateRef State = 0;
    395   if (evalFunction)
    396     State = (this->*evalFunction)(CE, C);
    397   if (!State)
    398     return;
    399 
    400   C.addTransition(State);
    401 }
    402 
    403 bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
    404 
    405   if (checkUncontrolledFormatString(CE, C))
    406     return true;
    407 
    408   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
    409   StringRef Name = C.getCalleeName(FDecl);
    410   if (Name.empty())
    411     return false;
    412 
    413   if (checkSystemCall(CE, Name, C))
    414     return true;
    415 
    416   if (checkTaintedBufferSize(CE, FDecl, C))
    417     return true;
    418 
    419   return false;
    420 }
    421 
    422 SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
    423                                                   const Expr* Arg) {
    424   ProgramStateRef State = C.getState();
    425   SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
    426   if (AddrVal.isUnknownOrUndef())
    427     return 0;
    428 
    429   Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
    430   if (!AddrLoc)
    431     return 0;
    432 
    433   const PointerType *ArgTy =
    434     dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
    435   SVal Val = State->getSVal(*AddrLoc,
    436                             ArgTy ? ArgTy->getPointeeType(): QualType());
    437   return Val.getAsSymbol();
    438 }
    439 
    440 ProgramStateRef
    441 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
    442                                                    CheckerContext &C) const {
    443   ProgramStateRef State = C.getState();
    444 
    445   // Check for taint in arguments.
    446   bool IsTainted = false;
    447   for (ArgVector::const_iterator I = SrcArgs.begin(),
    448                                  E = SrcArgs.end(); I != E; ++I) {
    449     unsigned ArgNum = *I;
    450 
    451     if (ArgNum == InvalidArgIndex) {
    452       // Check if any of the arguments is tainted, but skip the
    453       // destination arguments.
    454       for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
    455         if (isDestinationArgument(i))
    456           continue;
    457         if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
    458           break;
    459       }
    460       break;
    461     }
    462 
    463     if (CE->getNumArgs() < (ArgNum + 1))
    464       return State;
    465     if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
    466       break;
    467   }
    468   if (!IsTainted)
    469     return State;
    470 
    471   // Mark the arguments which should be tainted after the function returns.
    472   for (ArgVector::const_iterator I = DstArgs.begin(),
    473                                  E = DstArgs.end(); I != E; ++I) {
    474     unsigned ArgNum = *I;
    475 
    476     // Should we mark all arguments as tainted?
    477     if (ArgNum == InvalidArgIndex) {
    478       // For all pointer and references that were passed in:
    479       //   If they are not pointing to const data, mark data as tainted.
    480       //   TODO: So far we are just going one level down; ideally we'd need to
    481       //         recurse here.
    482       for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
    483         const Expr *Arg = CE->getArg(i);
    484         // Process pointer argument.
    485         const Type *ArgTy = Arg->getType().getTypePtr();
    486         QualType PType = ArgTy->getPointeeType();
    487         if ((!PType.isNull() && !PType.isConstQualified())
    488             || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
    489           State = State->add<TaintArgsOnPostVisit>(i);
    490       }
    491       continue;
    492     }
    493 
    494     // Should mark the return value?
    495     if (ArgNum == ReturnValueIndex) {
    496       State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
    497       continue;
    498     }
    499 
    500     // Mark the given argument.
    501     assert(ArgNum < CE->getNumArgs());
    502     State = State->add<TaintArgsOnPostVisit>(ArgNum);
    503   }
    504 
    505   return State;
    506 }
    507 
    508 
    509 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0
    510 // and arg 1 should get taint.
    511 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
    512                                                    CheckerContext &C) const {
    513   assert(CE->getNumArgs() >= 2);
    514   ProgramStateRef State = C.getState();
    515 
    516   // Check is the file descriptor is tainted.
    517   if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
    518       isStdin(CE->getArg(0), C)) {
    519     // All arguments except for the first two should get taint.
    520     for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
    521         State = State->add<TaintArgsOnPostVisit>(i);
    522     return State;
    523   }
    524 
    525   return 0;
    526 }
    527 
    528 
    529 // If argument 0(protocol domain) is network, the return value should get taint.
    530 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
    531                                                 CheckerContext &C) const {
    532   ProgramStateRef State = C.getState();
    533   if (CE->getNumArgs() < 3)
    534     return State;
    535 
    536   SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
    537   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
    538   // White list the internal communication protocols.
    539   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
    540       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
    541     return State;
    542   State = State->addTaint(CE, C.getLocationContext());
    543   return State;
    544 }
    545 
    546 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
    547                                                    CheckerContext &C) const {
    548   ProgramStateRef State = C.getState();
    549   if (CE->getNumArgs() < 2)
    550     return State;
    551 
    552   SVal x = State->getSVal(CE->getArg(1), C.getLocationContext());
    553   // All arguments except for the very first one should get taint.
    554   for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
    555     // The arguments are pointer arguments. The data they are pointing at is
    556     // tainted after the call.
    557     const Expr* Arg = CE->getArg(i);
    558         SymbolRef Sym = getPointedToSymbol(C, Arg);
    559     if (Sym)
    560       State = State->addTaint(Sym);
    561   }
    562   return State;
    563 }
    564 
    565 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
    566                                                   CheckerContext &C) const {
    567   return C.getState()->addTaint(CE, C.getLocationContext());
    568 }
    569 
    570 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
    571   ProgramStateRef State = C.getState();
    572   SVal Val = State->getSVal(E, C.getLocationContext());
    573 
    574   // stdin is a pointer, so it would be a region.
    575   const MemRegion *MemReg = Val.getAsRegion();
    576 
    577   // The region should be symbolic, we do not know it's value.
    578   const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
    579   if (!SymReg)
    580     return false;
    581 
    582   // Get it's symbol and find the declaration region it's pointing to.
    583   const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
    584   if (!Sm)
    585     return false;
    586   const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
    587   if (!DeclReg)
    588     return false;
    589 
    590   // This region corresponds to a declaration, find out if it's a global/extern
    591   // variable named stdin with the proper type.
    592   if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
    593     D = D->getCanonicalDecl();
    594     if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
    595         if (const PointerType * PtrTy =
    596               dyn_cast<PointerType>(D->getType().getTypePtr()))
    597           if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
    598             return true;
    599   }
    600   return false;
    601 }
    602 
    603 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
    604                                        const CheckerContext &C,
    605                                        unsigned int &ArgNum) {
    606   // Find if the function contains a format string argument.
    607   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
    608   // vsnprintf, syslog, custom annotated functions.
    609   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
    610   if (!FDecl)
    611     return false;
    612   for (specific_attr_iterator<FormatAttr>
    613          i = FDecl->specific_attr_begin<FormatAttr>(),
    614          e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
    615 
    616     const FormatAttr *Format = *i;
    617     ArgNum = Format->getFormatIdx() - 1;
    618     if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
    619       return true;
    620   }
    621 
    622   // Or if a function is named setproctitle (this is a heuristic).
    623   if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
    624     ArgNum = 0;
    625     return true;
    626   }
    627 
    628   return false;
    629 }
    630 
    631 bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
    632                                                   const char Msg[],
    633                                                   CheckerContext &C) const {
    634   assert(E);
    635 
    636   // Check for taint.
    637   ProgramStateRef State = C.getState();
    638   if (!State->isTainted(getPointedToSymbol(C, E)) &&
    639       !State->isTainted(E, C.getLocationContext()))
    640     return false;
    641 
    642   // Generate diagnostic.
    643   if (ExplodedNode *N = C.addTransition()) {
    644     initBugType();
    645     BugReport *report = new BugReport(*BT, Msg, N);
    646     report->addRange(E->getSourceRange());
    647     C.EmitReport(report);
    648     return true;
    649   }
    650   return false;
    651 }
    652 
    653 bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
    654                                                         CheckerContext &C) const{
    655   // Check if the function contains a format string argument.
    656   unsigned int ArgNum = 0;
    657   if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
    658     return false;
    659 
    660   // If either the format string content or the pointer itself are tainted, warn.
    661   if (generateReportIfTainted(CE->getArg(ArgNum),
    662                               MsgUncontrolledFormatString, C))
    663     return true;
    664   return false;
    665 }
    666 
    667 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
    668                                           StringRef Name,
    669                                           CheckerContext &C) const {
    670   // TODO: It might make sense to run this check on demand. In some cases,
    671   // we should check if the environment has been cleansed here. We also might
    672   // need to know if the user was reset before these calls(seteuid).
    673   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
    674     .Case("system", 0)
    675     .Case("popen", 0)
    676     .Case("execl", 0)
    677     .Case("execle", 0)
    678     .Case("execlp", 0)
    679     .Case("execv", 0)
    680     .Case("execvp", 0)
    681     .Case("execvP", 0)
    682     .Case("execve", 0)
    683     .Case("dlopen", 0)
    684     .Default(UINT_MAX);
    685 
    686   if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
    687     return false;
    688 
    689   if (generateReportIfTainted(CE->getArg(ArgNum),
    690                               MsgSanitizeSystemArgs, C))
    691     return true;
    692 
    693   return false;
    694 }
    695 
    696 // TODO: Should this check be a part of the CString checker?
    697 // If yes, should taint be a global setting?
    698 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
    699                                                  const FunctionDecl *FDecl,
    700                                                  CheckerContext &C) const {
    701   // If the function has a buffer size argument, set ArgNum.
    702   unsigned ArgNum = InvalidArgIndex;
    703   unsigned BId = 0;
    704   if ( (BId = FDecl->getMemoryFunctionKind()) )
    705     switch(BId) {
    706     case Builtin::BImemcpy:
    707     case Builtin::BImemmove:
    708     case Builtin::BIstrncpy:
    709       ArgNum = 2;
    710       break;
    711     case Builtin::BIstrndup:
    712       ArgNum = 1;
    713       break;
    714     default:
    715       break;
    716     };
    717 
    718   if (ArgNum == InvalidArgIndex) {
    719     if (C.isCLibraryFunction(FDecl, "malloc") ||
    720         C.isCLibraryFunction(FDecl, "calloc") ||
    721         C.isCLibraryFunction(FDecl, "alloca"))
    722       ArgNum = 0;
    723     else if (C.isCLibraryFunction(FDecl, "memccpy"))
    724       ArgNum = 3;
    725     else if (C.isCLibraryFunction(FDecl, "realloc"))
    726       ArgNum = 1;
    727     else if (C.isCLibraryFunction(FDecl, "bcopy"))
    728       ArgNum = 2;
    729   }
    730 
    731   if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
    732       generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C))
    733     return true;
    734 
    735   return false;
    736 }
    737 
    738 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
    739   mgr.registerChecker<GenericTaintChecker>();
    740 }
    741