1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This checker defines the attack surface for generic taint propagation. 11 // 12 // The taint information produced by it might be useful to other checkers. For 13 // example, checkers should report errors which involve tainted data more 14 // aggressively, even if the involved symbols are under constrained. 15 // 16 //===----------------------------------------------------------------------===// 17 #include "ClangSACheckers.h" 18 #include "clang/StaticAnalyzer/Core/Checker.h" 19 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 20 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 21 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 23 #include "clang/Basic/Builtins.h" 24 #include <climits> 25 26 using namespace clang; 27 using namespace ento; 28 29 namespace { 30 class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>, 31 check::PreStmt<CallExpr> > { 32 public: 33 static void *getTag() { static int Tag; return &Tag; } 34 35 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 36 void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const; 37 38 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 39 40 private: 41 static const unsigned InvalidArgIndex = UINT_MAX; 42 /// Denotes the return vale. 43 static const unsigned ReturnValueIndex = UINT_MAX - 1; 44 45 mutable OwningPtr<BugType> BT; 46 inline void initBugType() const { 47 if (!BT) 48 BT.reset(new BugType("Use of Untrusted Data", "Untrusted Data")); 49 } 50 51 /// \brief Catch taint related bugs. Check if tainted data is passed to a 52 /// system call etc. 53 bool checkPre(const CallExpr *CE, CheckerContext &C) const; 54 55 /// \brief Add taint sources on a pre-visit. 56 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; 57 58 /// \brief Propagate taint generated at pre-visit. 59 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; 60 61 /// \brief Add taint sources on a post visit. 62 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const; 63 64 /// Check if the region the expression evaluates to is the standard input, 65 /// and thus, is tainted. 66 static bool isStdin(const Expr *E, CheckerContext &C); 67 68 /// \brief Given a pointer argument, get the symbol of the value it contains 69 /// (points to). 70 static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg); 71 72 /// Functions defining the attack surface. 73 typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *, 74 CheckerContext &C) const; 75 ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const; 76 ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const; 77 ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const; 78 79 /// Taint the scanned input if the file is tainted. 80 ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const; 81 82 /// Check for CWE-134: Uncontrolled Format String. 83 static const char MsgUncontrolledFormatString[]; 84 bool checkUncontrolledFormatString(const CallExpr *CE, 85 CheckerContext &C) const; 86 87 /// Check for: 88 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 89 /// CWE-78, "Failure to Sanitize Data into an OS Command" 90 static const char MsgSanitizeSystemArgs[]; 91 bool checkSystemCall(const CallExpr *CE, StringRef Name, 92 CheckerContext &C) const; 93 94 /// Check if tainted data is used as a buffer size ins strn.. functions, 95 /// and allocators. 96 static const char MsgTaintedBufferSize[]; 97 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, 98 CheckerContext &C) const; 99 100 /// Generate a report if the expression is tainted or points to tainted data. 101 bool generateReportIfTainted(const Expr *E, const char Msg[], 102 CheckerContext &C) const; 103 104 105 typedef llvm::SmallVector<unsigned, 2> ArgVector; 106 107 /// \brief A struct used to specify taint propagation rules for a function. 108 /// 109 /// If any of the possible taint source arguments is tainted, all of the 110 /// destination arguments should also be tainted. Use InvalidArgIndex in the 111 /// src list to specify that all of the arguments can introduce taint. Use 112 /// InvalidArgIndex in the dst arguments to signify that all the non-const 113 /// pointer and reference arguments might be tainted on return. If 114 /// ReturnValueIndex is added to the dst list, the return value will be 115 /// tainted. 116 struct TaintPropagationRule { 117 /// List of arguments which can be taint sources and should be checked. 118 ArgVector SrcArgs; 119 /// List of arguments which should be tainted on function return. 120 ArgVector DstArgs; 121 // TODO: Check if using other data structures would be more optimal. 122 123 TaintPropagationRule() {} 124 125 TaintPropagationRule(unsigned SArg, 126 unsigned DArg, bool TaintRet = false) { 127 SrcArgs.push_back(SArg); 128 DstArgs.push_back(DArg); 129 if (TaintRet) 130 DstArgs.push_back(ReturnValueIndex); 131 } 132 133 TaintPropagationRule(unsigned SArg1, unsigned SArg2, 134 unsigned DArg, bool TaintRet = false) { 135 SrcArgs.push_back(SArg1); 136 SrcArgs.push_back(SArg2); 137 DstArgs.push_back(DArg); 138 if (TaintRet) 139 DstArgs.push_back(ReturnValueIndex); 140 } 141 142 /// Get the propagation rule for a given function. 143 static TaintPropagationRule 144 getTaintPropagationRule(const FunctionDecl *FDecl, 145 StringRef Name, 146 CheckerContext &C); 147 148 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 149 inline void addDstArg(unsigned A) { DstArgs.push_back(A); } 150 151 inline bool isNull() const { return SrcArgs.empty(); } 152 153 inline bool isDestinationArgument(unsigned ArgNum) const { 154 return (std::find(DstArgs.begin(), 155 DstArgs.end(), ArgNum) != DstArgs.end()); 156 } 157 158 static inline bool isTaintedOrPointsToTainted(const Expr *E, 159 ProgramStateRef State, 160 CheckerContext &C) { 161 return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) || 162 (E->getType().getTypePtr()->isPointerType() && 163 State->isTainted(getPointedToSymbol(C, E)))); 164 } 165 166 /// \brief Pre-process a function which propagates taint according to the 167 /// taint rule. 168 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; 169 170 }; 171 }; 172 173 const unsigned GenericTaintChecker::ReturnValueIndex; 174 const unsigned GenericTaintChecker::InvalidArgIndex; 175 176 const char GenericTaintChecker::MsgUncontrolledFormatString[] = 177 "Untrusted data is used as a format string " 178 "(CWE-134: Uncontrolled Format String)"; 179 180 const char GenericTaintChecker::MsgSanitizeSystemArgs[] = 181 "Untrusted data is passed to a system call " 182 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 183 184 const char GenericTaintChecker::MsgTaintedBufferSize[] = 185 "Untrusted data is used to specify the buffer size " 186 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for " 187 "character data and the null terminator)"; 188 189 } // end of anonymous namespace 190 191 /// A set which is used to pass information from call pre-visit instruction 192 /// to the call post-visit. The values are unsigned integers, which are either 193 /// ReturnValueIndex, or indexes of the pointer/reference argument, which 194 /// points to data, which should be tainted on return. 195 namespace { struct TaintArgsOnPostVisit{}; } 196 namespace clang { namespace ento { 197 template<> struct ProgramStateTrait<TaintArgsOnPostVisit> 198 : public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > { 199 static void *GDMIndex() { return GenericTaintChecker::getTag(); } 200 }; 201 }} 202 203 GenericTaintChecker::TaintPropagationRule 204 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 205 const FunctionDecl *FDecl, 206 StringRef Name, 207 CheckerContext &C) { 208 // TODO: Currently, we might loose precision here: we always mark a return 209 // value as tainted even if it's just a pointer, pointing to tainted data. 210 211 // Check for exact name match for functions without builtin substitutes. 212 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name) 213 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex)) 214 .Case("atol", TaintPropagationRule(0, ReturnValueIndex)) 215 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex)) 216 .Case("getc", TaintPropagationRule(0, ReturnValueIndex)) 217 .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex)) 218 .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex)) 219 .Case("getw", TaintPropagationRule(0, ReturnValueIndex)) 220 .Case("toupper", TaintPropagationRule(0, ReturnValueIndex)) 221 .Case("tolower", TaintPropagationRule(0, ReturnValueIndex)) 222 .Case("strchr", TaintPropagationRule(0, ReturnValueIndex)) 223 .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex)) 224 .Case("read", TaintPropagationRule(0, 2, 1, true)) 225 .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true)) 226 .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true)) 227 .Case("fgets", TaintPropagationRule(2, 0, true)) 228 .Case("getline", TaintPropagationRule(2, 0)) 229 .Case("getdelim", TaintPropagationRule(3, 0)) 230 .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex)) 231 .Default(TaintPropagationRule()); 232 233 if (!Rule.isNull()) 234 return Rule; 235 236 // Check if it's one of the memory setting/copying functions. 237 // This check is specialized but faster then calling isCLibraryFunction. 238 unsigned BId = 0; 239 if ( (BId = FDecl->getMemoryFunctionKind()) ) 240 switch(BId) { 241 case Builtin::BImemcpy: 242 case Builtin::BImemmove: 243 case Builtin::BIstrncpy: 244 case Builtin::BIstrncat: 245 return TaintPropagationRule(1, 2, 0, true); 246 case Builtin::BIstrlcpy: 247 case Builtin::BIstrlcat: 248 return TaintPropagationRule(1, 2, 0, false); 249 case Builtin::BIstrndup: 250 return TaintPropagationRule(0, 1, ReturnValueIndex); 251 252 default: 253 break; 254 }; 255 256 // Process all other functions which could be defined as builtins. 257 if (Rule.isNull()) { 258 if (C.isCLibraryFunction(FDecl, "snprintf") || 259 C.isCLibraryFunction(FDecl, "sprintf")) 260 return TaintPropagationRule(InvalidArgIndex, 0, true); 261 else if (C.isCLibraryFunction(FDecl, "strcpy") || 262 C.isCLibraryFunction(FDecl, "stpcpy") || 263 C.isCLibraryFunction(FDecl, "strcat")) 264 return TaintPropagationRule(1, 0, true); 265 else if (C.isCLibraryFunction(FDecl, "bcopy")) 266 return TaintPropagationRule(0, 2, 1, false); 267 else if (C.isCLibraryFunction(FDecl, "strdup") || 268 C.isCLibraryFunction(FDecl, "strdupa")) 269 return TaintPropagationRule(0, ReturnValueIndex); 270 else if (C.isCLibraryFunction(FDecl, "wcsdup")) 271 return TaintPropagationRule(0, ReturnValueIndex); 272 } 273 274 // Skipping the following functions, since they might be used for cleansing 275 // or smart memory copy: 276 // - memccpy - copying untill hitting a special character. 277 278 return TaintPropagationRule(); 279 } 280 281 void GenericTaintChecker::checkPreStmt(const CallExpr *CE, 282 CheckerContext &C) const { 283 // Check for errors first. 284 if (checkPre(CE, C)) 285 return; 286 287 // Add taint second. 288 addSourcesPre(CE, C); 289 } 290 291 void GenericTaintChecker::checkPostStmt(const CallExpr *CE, 292 CheckerContext &C) const { 293 if (propagateFromPre(CE, C)) 294 return; 295 addSourcesPost(CE, C); 296 } 297 298 void GenericTaintChecker::addSourcesPre(const CallExpr *CE, 299 CheckerContext &C) const { 300 ProgramStateRef State = 0; 301 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 302 StringRef Name = C.getCalleeName(FDecl); 303 if (Name.empty()) 304 return; 305 306 // First, try generating a propagation rule for this function. 307 TaintPropagationRule Rule = 308 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); 309 if (!Rule.isNull()) { 310 State = Rule.process(CE, C); 311 if (!State) 312 return; 313 C.addTransition(State); 314 return; 315 } 316 317 // Otherwise, check if we have custom pre-processing implemented. 318 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 319 .Case("fscanf", &GenericTaintChecker::preFscanf) 320 .Default(0); 321 // Check and evaluate the call. 322 if (evalFunction) 323 State = (this->*evalFunction)(CE, C); 324 if (!State) 325 return; 326 C.addTransition(State); 327 328 } 329 330 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, 331 CheckerContext &C) const { 332 ProgramStateRef State = C.getState(); 333 334 // Depending on what was tainted at pre-visit, we determined a set of 335 // arguments which should be tainted after the function returns. These are 336 // stored in the state as TaintArgsOnPostVisit set. 337 llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>(); 338 if (TaintArgs.isEmpty()) 339 return false; 340 341 for (llvm::ImmutableSet<unsigned>::iterator 342 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { 343 unsigned ArgNum = *I; 344 345 // Special handling for the tainted return value. 346 if (ArgNum == ReturnValueIndex) { 347 State = State->addTaint(CE, C.getLocationContext()); 348 continue; 349 } 350 351 // The arguments are pointer arguments. The data they are pointing at is 352 // tainted after the call. 353 if (CE->getNumArgs() < (ArgNum + 1)) 354 return false; 355 const Expr* Arg = CE->getArg(ArgNum); 356 SymbolRef Sym = getPointedToSymbol(C, Arg); 357 if (Sym) 358 State = State->addTaint(Sym); 359 } 360 361 // Clear up the taint info from the state. 362 State = State->remove<TaintArgsOnPostVisit>(); 363 364 if (State != C.getState()) { 365 C.addTransition(State); 366 return true; 367 } 368 return false; 369 } 370 371 void GenericTaintChecker::addSourcesPost(const CallExpr *CE, 372 CheckerContext &C) const { 373 // Define the attack surface. 374 // Set the evaluation function by switching on the callee name. 375 StringRef Name = C.getCalleeName(CE); 376 if (Name.empty()) 377 return; 378 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 379 .Case("scanf", &GenericTaintChecker::postScanf) 380 // TODO: Add support for vfscanf & family. 381 .Case("getchar", &GenericTaintChecker::postRetTaint) 382 .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint) 383 .Case("getenv", &GenericTaintChecker::postRetTaint) 384 .Case("fopen", &GenericTaintChecker::postRetTaint) 385 .Case("fdopen", &GenericTaintChecker::postRetTaint) 386 .Case("freopen", &GenericTaintChecker::postRetTaint) 387 .Case("getch", &GenericTaintChecker::postRetTaint) 388 .Case("wgetch", &GenericTaintChecker::postRetTaint) 389 .Case("socket", &GenericTaintChecker::postSocket) 390 .Default(0); 391 392 // If the callee isn't defined, it is not of security concern. 393 // Check and evaluate the call. 394 ProgramStateRef State = 0; 395 if (evalFunction) 396 State = (this->*evalFunction)(CE, C); 397 if (!State) 398 return; 399 400 C.addTransition(State); 401 } 402 403 bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ 404 405 if (checkUncontrolledFormatString(CE, C)) 406 return true; 407 408 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 409 StringRef Name = C.getCalleeName(FDecl); 410 if (Name.empty()) 411 return false; 412 413 if (checkSystemCall(CE, Name, C)) 414 return true; 415 416 if (checkTaintedBufferSize(CE, FDecl, C)) 417 return true; 418 419 return false; 420 } 421 422 SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C, 423 const Expr* Arg) { 424 ProgramStateRef State = C.getState(); 425 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext()); 426 if (AddrVal.isUnknownOrUndef()) 427 return 0; 428 429 Loc *AddrLoc = dyn_cast<Loc>(&AddrVal); 430 if (!AddrLoc) 431 return 0; 432 433 const PointerType *ArgTy = 434 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr()); 435 SVal Val = State->getSVal(*AddrLoc, 436 ArgTy ? ArgTy->getPointeeType(): QualType()); 437 return Val.getAsSymbol(); 438 } 439 440 ProgramStateRef 441 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, 442 CheckerContext &C) const { 443 ProgramStateRef State = C.getState(); 444 445 // Check for taint in arguments. 446 bool IsTainted = false; 447 for (ArgVector::const_iterator I = SrcArgs.begin(), 448 E = SrcArgs.end(); I != E; ++I) { 449 unsigned ArgNum = *I; 450 451 if (ArgNum == InvalidArgIndex) { 452 // Check if any of the arguments is tainted, but skip the 453 // destination arguments. 454 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 455 if (isDestinationArgument(i)) 456 continue; 457 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) 458 break; 459 } 460 break; 461 } 462 463 if (CE->getNumArgs() < (ArgNum + 1)) 464 return State; 465 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) 466 break; 467 } 468 if (!IsTainted) 469 return State; 470 471 // Mark the arguments which should be tainted after the function returns. 472 for (ArgVector::const_iterator I = DstArgs.begin(), 473 E = DstArgs.end(); I != E; ++I) { 474 unsigned ArgNum = *I; 475 476 // Should we mark all arguments as tainted? 477 if (ArgNum == InvalidArgIndex) { 478 // For all pointer and references that were passed in: 479 // If they are not pointing to const data, mark data as tainted. 480 // TODO: So far we are just going one level down; ideally we'd need to 481 // recurse here. 482 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 483 const Expr *Arg = CE->getArg(i); 484 // Process pointer argument. 485 const Type *ArgTy = Arg->getType().getTypePtr(); 486 QualType PType = ArgTy->getPointeeType(); 487 if ((!PType.isNull() && !PType.isConstQualified()) 488 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) 489 State = State->add<TaintArgsOnPostVisit>(i); 490 } 491 continue; 492 } 493 494 // Should mark the return value? 495 if (ArgNum == ReturnValueIndex) { 496 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 497 continue; 498 } 499 500 // Mark the given argument. 501 assert(ArgNum < CE->getNumArgs()); 502 State = State->add<TaintArgsOnPostVisit>(ArgNum); 503 } 504 505 return State; 506 } 507 508 509 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0 510 // and arg 1 should get taint. 511 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE, 512 CheckerContext &C) const { 513 assert(CE->getNumArgs() >= 2); 514 ProgramStateRef State = C.getState(); 515 516 // Check is the file descriptor is tainted. 517 if (State->isTainted(CE->getArg(0), C.getLocationContext()) || 518 isStdin(CE->getArg(0), C)) { 519 // All arguments except for the first two should get taint. 520 for (unsigned int i = 2; i < CE->getNumArgs(); ++i) 521 State = State->add<TaintArgsOnPostVisit>(i); 522 return State; 523 } 524 525 return 0; 526 } 527 528 529 // If argument 0(protocol domain) is network, the return value should get taint. 530 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE, 531 CheckerContext &C) const { 532 ProgramStateRef State = C.getState(); 533 if (CE->getNumArgs() < 3) 534 return State; 535 536 SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); 537 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 538 // White list the internal communication protocols. 539 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 540 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 541 return State; 542 State = State->addTaint(CE, C.getLocationContext()); 543 return State; 544 } 545 546 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE, 547 CheckerContext &C) const { 548 ProgramStateRef State = C.getState(); 549 if (CE->getNumArgs() < 2) 550 return State; 551 552 SVal x = State->getSVal(CE->getArg(1), C.getLocationContext()); 553 // All arguments except for the very first one should get taint. 554 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) { 555 // The arguments are pointer arguments. The data they are pointing at is 556 // tainted after the call. 557 const Expr* Arg = CE->getArg(i); 558 SymbolRef Sym = getPointedToSymbol(C, Arg); 559 if (Sym) 560 State = State->addTaint(Sym); 561 } 562 return State; 563 } 564 565 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE, 566 CheckerContext &C) const { 567 return C.getState()->addTaint(CE, C.getLocationContext()); 568 } 569 570 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 571 ProgramStateRef State = C.getState(); 572 SVal Val = State->getSVal(E, C.getLocationContext()); 573 574 // stdin is a pointer, so it would be a region. 575 const MemRegion *MemReg = Val.getAsRegion(); 576 577 // The region should be symbolic, we do not know it's value. 578 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 579 if (!SymReg) 580 return false; 581 582 // Get it's symbol and find the declaration region it's pointing to. 583 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 584 if (!Sm) 585 return false; 586 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 587 if (!DeclReg) 588 return false; 589 590 // This region corresponds to a declaration, find out if it's a global/extern 591 // variable named stdin with the proper type. 592 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 593 D = D->getCanonicalDecl(); 594 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) 595 if (const PointerType * PtrTy = 596 dyn_cast<PointerType>(D->getType().getTypePtr())) 597 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType()) 598 return true; 599 } 600 return false; 601 } 602 603 static bool getPrintfFormatArgumentNum(const CallExpr *CE, 604 const CheckerContext &C, 605 unsigned int &ArgNum) { 606 // Find if the function contains a format string argument. 607 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 608 // vsnprintf, syslog, custom annotated functions. 609 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 610 if (!FDecl) 611 return false; 612 for (specific_attr_iterator<FormatAttr> 613 i = FDecl->specific_attr_begin<FormatAttr>(), 614 e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) { 615 616 const FormatAttr *Format = *i; 617 ArgNum = Format->getFormatIdx() - 1; 618 if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum) 619 return true; 620 } 621 622 // Or if a function is named setproctitle (this is a heuristic). 623 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 624 ArgNum = 0; 625 return true; 626 } 627 628 return false; 629 } 630 631 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, 632 const char Msg[], 633 CheckerContext &C) const { 634 assert(E); 635 636 // Check for taint. 637 ProgramStateRef State = C.getState(); 638 if (!State->isTainted(getPointedToSymbol(C, E)) && 639 !State->isTainted(E, C.getLocationContext())) 640 return false; 641 642 // Generate diagnostic. 643 if (ExplodedNode *N = C.addTransition()) { 644 initBugType(); 645 BugReport *report = new BugReport(*BT, Msg, N); 646 report->addRange(E->getSourceRange()); 647 C.EmitReport(report); 648 return true; 649 } 650 return false; 651 } 652 653 bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE, 654 CheckerContext &C) const{ 655 // Check if the function contains a format string argument. 656 unsigned int ArgNum = 0; 657 if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 658 return false; 659 660 // If either the format string content or the pointer itself are tainted, warn. 661 if (generateReportIfTainted(CE->getArg(ArgNum), 662 MsgUncontrolledFormatString, C)) 663 return true; 664 return false; 665 } 666 667 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, 668 StringRef Name, 669 CheckerContext &C) const { 670 // TODO: It might make sense to run this check on demand. In some cases, 671 // we should check if the environment has been cleansed here. We also might 672 // need to know if the user was reset before these calls(seteuid). 673 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 674 .Case("system", 0) 675 .Case("popen", 0) 676 .Case("execl", 0) 677 .Case("execle", 0) 678 .Case("execlp", 0) 679 .Case("execv", 0) 680 .Case("execvp", 0) 681 .Case("execvP", 0) 682 .Case("execve", 0) 683 .Case("dlopen", 0) 684 .Default(UINT_MAX); 685 686 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1)) 687 return false; 688 689 if (generateReportIfTainted(CE->getArg(ArgNum), 690 MsgSanitizeSystemArgs, C)) 691 return true; 692 693 return false; 694 } 695 696 // TODO: Should this check be a part of the CString checker? 697 // If yes, should taint be a global setting? 698 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, 699 const FunctionDecl *FDecl, 700 CheckerContext &C) const { 701 // If the function has a buffer size argument, set ArgNum. 702 unsigned ArgNum = InvalidArgIndex; 703 unsigned BId = 0; 704 if ( (BId = FDecl->getMemoryFunctionKind()) ) 705 switch(BId) { 706 case Builtin::BImemcpy: 707 case Builtin::BImemmove: 708 case Builtin::BIstrncpy: 709 ArgNum = 2; 710 break; 711 case Builtin::BIstrndup: 712 ArgNum = 1; 713 break; 714 default: 715 break; 716 }; 717 718 if (ArgNum == InvalidArgIndex) { 719 if (C.isCLibraryFunction(FDecl, "malloc") || 720 C.isCLibraryFunction(FDecl, "calloc") || 721 C.isCLibraryFunction(FDecl, "alloca")) 722 ArgNum = 0; 723 else if (C.isCLibraryFunction(FDecl, "memccpy")) 724 ArgNum = 3; 725 else if (C.isCLibraryFunction(FDecl, "realloc")) 726 ArgNum = 1; 727 else if (C.isCLibraryFunction(FDecl, "bcopy")) 728 ArgNum = 2; 729 } 730 731 if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && 732 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C)) 733 return true; 734 735 return false; 736 } 737 738 void ento::registerGenericTaintChecker(CheckerManager &mgr) { 739 mgr.registerChecker<GenericTaintChecker>(); 740 } 741