1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This checker defines the attack surface for generic taint propagation. 11 // 12 // The taint information produced by it might be useful to other checkers. For 13 // example, checkers should report errors which involve tainted data more 14 // aggressively, even if the involved symbols are under constrained. 15 // 16 //===----------------------------------------------------------------------===// 17 #include "ClangSACheckers.h" 18 #include "clang/StaticAnalyzer/Core/Checker.h" 19 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 20 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 21 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 23 #include "clang/Basic/Builtins.h" 24 #include <climits> 25 26 using namespace clang; 27 using namespace ento; 28 29 namespace { 30 class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>, 31 check::PreStmt<CallExpr> > { 32 public: 33 static void *getTag() { static int Tag; return &Tag; } 34 35 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 36 void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const; 37 38 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 39 40 private: 41 static const unsigned InvalidArgIndex = UINT_MAX; 42 /// Denotes the return vale. 43 static const unsigned ReturnValueIndex = UINT_MAX - 1; 44 45 mutable OwningPtr<BugType> BT; 46 inline void initBugType() const { 47 if (!BT) 48 BT.reset(new BugType("Use of Untrusted Data", "Untrusted Data")); 49 } 50 51 /// \brief Catch taint related bugs. Check if tainted data is passed to a 52 /// system call etc. 53 bool checkPre(const CallExpr *CE, CheckerContext &C) const; 54 55 /// \brief Add taint sources on a pre-visit. 56 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; 57 58 /// \brief Propagate taint generated at pre-visit. 59 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; 60 61 /// \brief Add taint sources on a post visit. 62 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const; 63 64 /// Check if the region the expression evaluates to is the standard input, 65 /// and thus, is tainted. 66 static bool isStdin(const Expr *E, CheckerContext &C); 67 68 /// \brief Given a pointer argument, get the symbol of the value it contains 69 /// (points to). 70 static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg); 71 72 /// Functions defining the attack surface. 73 typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *, 74 CheckerContext &C) const; 75 ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const; 76 ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const; 77 ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const; 78 79 /// Taint the scanned input if the file is tainted. 80 ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const; 81 82 /// Check for CWE-134: Uncontrolled Format String. 83 static const char MsgUncontrolledFormatString[]; 84 bool checkUncontrolledFormatString(const CallExpr *CE, 85 CheckerContext &C) const; 86 87 /// Check for: 88 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 89 /// CWE-78, "Failure to Sanitize Data into an OS Command" 90 static const char MsgSanitizeSystemArgs[]; 91 bool checkSystemCall(const CallExpr *CE, StringRef Name, 92 CheckerContext &C) const; 93 94 /// Check if tainted data is used as a buffer size ins strn.. functions, 95 /// and allocators. 96 static const char MsgTaintedBufferSize[]; 97 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, 98 CheckerContext &C) const; 99 100 /// Generate a report if the expression is tainted or points to tainted data. 101 bool generateReportIfTainted(const Expr *E, const char Msg[], 102 CheckerContext &C) const; 103 104 105 typedef llvm::SmallVector<unsigned, 2> ArgVector; 106 107 /// \brief A struct used to specify taint propagation rules for a function. 108 /// 109 /// If any of the possible taint source arguments is tainted, all of the 110 /// destination arguments should also be tainted. Use InvalidArgIndex in the 111 /// src list to specify that all of the arguments can introduce taint. Use 112 /// InvalidArgIndex in the dst arguments to signify that all the non-const 113 /// pointer and reference arguments might be tainted on return. If 114 /// ReturnValueIndex is added to the dst list, the return value will be 115 /// tainted. 116 struct TaintPropagationRule { 117 /// List of arguments which can be taint sources and should be checked. 118 ArgVector SrcArgs; 119 /// List of arguments which should be tainted on function return. 120 ArgVector DstArgs; 121 // TODO: Check if using other data structures would be more optimal. 122 123 TaintPropagationRule() {} 124 125 TaintPropagationRule(unsigned SArg, 126 unsigned DArg, bool TaintRet = false) { 127 SrcArgs.push_back(SArg); 128 DstArgs.push_back(DArg); 129 if (TaintRet) 130 DstArgs.push_back(ReturnValueIndex); 131 } 132 133 TaintPropagationRule(unsigned SArg1, unsigned SArg2, 134 unsigned DArg, bool TaintRet = false) { 135 SrcArgs.push_back(SArg1); 136 SrcArgs.push_back(SArg2); 137 DstArgs.push_back(DArg); 138 if (TaintRet) 139 DstArgs.push_back(ReturnValueIndex); 140 } 141 142 /// Get the propagation rule for a given function. 143 static TaintPropagationRule 144 getTaintPropagationRule(const FunctionDecl *FDecl, 145 StringRef Name, 146 CheckerContext &C); 147 148 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 149 inline void addDstArg(unsigned A) { DstArgs.push_back(A); } 150 151 inline bool isNull() const { return SrcArgs.empty(); } 152 153 inline bool isDestinationArgument(unsigned ArgNum) const { 154 return (std::find(DstArgs.begin(), 155 DstArgs.end(), ArgNum) != DstArgs.end()); 156 } 157 158 static inline bool isTaintedOrPointsToTainted(const Expr *E, 159 ProgramStateRef State, 160 CheckerContext &C) { 161 return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) || 162 (E->getType().getTypePtr()->isPointerType() && 163 State->isTainted(getPointedToSymbol(C, E)))); 164 } 165 166 /// \brief Pre-process a function which propagates taint according to the 167 /// taint rule. 168 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; 169 170 }; 171 }; 172 173 const unsigned GenericTaintChecker::ReturnValueIndex; 174 const unsigned GenericTaintChecker::InvalidArgIndex; 175 176 const char GenericTaintChecker::MsgUncontrolledFormatString[] = 177 "Untrusted data is used as a format string " 178 "(CWE-134: Uncontrolled Format String)"; 179 180 const char GenericTaintChecker::MsgSanitizeSystemArgs[] = 181 "Untrusted data is passed to a system call " 182 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 183 184 const char GenericTaintChecker::MsgTaintedBufferSize[] = 185 "Untrusted data is used to specify the buffer size " 186 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for " 187 "character data and the null terminator)"; 188 189 } // end of anonymous namespace 190 191 /// A set which is used to pass information from call pre-visit instruction 192 /// to the call post-visit. The values are unsigned integers, which are either 193 /// ReturnValueIndex, or indexes of the pointer/reference argument, which 194 /// points to data, which should be tainted on return. 195 namespace { struct TaintArgsOnPostVisit{}; } 196 namespace clang { namespace ento { 197 template<> struct ProgramStateTrait<TaintArgsOnPostVisit> 198 : public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > { 199 static void *GDMIndex() { return GenericTaintChecker::getTag(); } 200 }; 201 }} 202 203 GenericTaintChecker::TaintPropagationRule 204 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 205 const FunctionDecl *FDecl, 206 StringRef Name, 207 CheckerContext &C) { 208 // TODO: Currently, we might loose precision here: we always mark a return 209 // value as tainted even if it's just a pointer, pointing to tainted data. 210 211 // Check for exact name match for functions without builtin substitutes. 212 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name) 213 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex)) 214 .Case("atol", TaintPropagationRule(0, ReturnValueIndex)) 215 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex)) 216 .Case("getc", TaintPropagationRule(0, ReturnValueIndex)) 217 .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex)) 218 .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex)) 219 .Case("getw", TaintPropagationRule(0, ReturnValueIndex)) 220 .Case("toupper", TaintPropagationRule(0, ReturnValueIndex)) 221 .Case("tolower", TaintPropagationRule(0, ReturnValueIndex)) 222 .Case("strchr", TaintPropagationRule(0, ReturnValueIndex)) 223 .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex)) 224 .Case("read", TaintPropagationRule(0, 2, 1, true)) 225 .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true)) 226 .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true)) 227 .Case("fgets", TaintPropagationRule(2, 0, true)) 228 .Case("getline", TaintPropagationRule(2, 0)) 229 .Case("getdelim", TaintPropagationRule(3, 0)) 230 .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex)) 231 .Default(TaintPropagationRule()); 232 233 if (!Rule.isNull()) 234 return Rule; 235 236 // Check if it's one of the memory setting/copying functions. 237 // This check is specialized but faster then calling isCLibraryFunction. 238 unsigned BId = 0; 239 if ( (BId = FDecl->getMemoryFunctionKind()) ) 240 switch(BId) { 241 case Builtin::BImemcpy: 242 case Builtin::BImemmove: 243 case Builtin::BIstrncpy: 244 case Builtin::BIstrncat: 245 return TaintPropagationRule(1, 2, 0, true); 246 case Builtin::BIstrlcpy: 247 case Builtin::BIstrlcat: 248 return TaintPropagationRule(1, 2, 0, false); 249 case Builtin::BIstrndup: 250 return TaintPropagationRule(0, 1, ReturnValueIndex); 251 252 default: 253 break; 254 }; 255 256 // Process all other functions which could be defined as builtins. 257 if (Rule.isNull()) { 258 if (C.isCLibraryFunction(FDecl, "snprintf") || 259 C.isCLibraryFunction(FDecl, "sprintf")) 260 return TaintPropagationRule(InvalidArgIndex, 0, true); 261 else if (C.isCLibraryFunction(FDecl, "strcpy") || 262 C.isCLibraryFunction(FDecl, "stpcpy") || 263 C.isCLibraryFunction(FDecl, "strcat")) 264 return TaintPropagationRule(1, 0, true); 265 else if (C.isCLibraryFunction(FDecl, "bcopy")) 266 return TaintPropagationRule(0, 2, 1, false); 267 else if (C.isCLibraryFunction(FDecl, "strdup") || 268 C.isCLibraryFunction(FDecl, "strdupa")) 269 return TaintPropagationRule(0, ReturnValueIndex); 270 else if (C.isCLibraryFunction(FDecl, "wcsdup")) 271 return TaintPropagationRule(0, ReturnValueIndex); 272 } 273 274 // Skipping the following functions, since they might be used for cleansing 275 // or smart memory copy: 276 // - memccpy - copying until hitting a special character. 277 278 return TaintPropagationRule(); 279 } 280 281 void GenericTaintChecker::checkPreStmt(const CallExpr *CE, 282 CheckerContext &C) const { 283 // Check for errors first. 284 if (checkPre(CE, C)) 285 return; 286 287 // Add taint second. 288 addSourcesPre(CE, C); 289 } 290 291 void GenericTaintChecker::checkPostStmt(const CallExpr *CE, 292 CheckerContext &C) const { 293 if (propagateFromPre(CE, C)) 294 return; 295 addSourcesPost(CE, C); 296 } 297 298 void GenericTaintChecker::addSourcesPre(const CallExpr *CE, 299 CheckerContext &C) const { 300 ProgramStateRef State = 0; 301 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 302 if (!FDecl || FDecl->getKind() != Decl::Function) 303 return; 304 305 StringRef Name = C.getCalleeName(FDecl); 306 if (Name.empty()) 307 return; 308 309 // First, try generating a propagation rule for this function. 310 TaintPropagationRule Rule = 311 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); 312 if (!Rule.isNull()) { 313 State = Rule.process(CE, C); 314 if (!State) 315 return; 316 C.addTransition(State); 317 return; 318 } 319 320 // Otherwise, check if we have custom pre-processing implemented. 321 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 322 .Case("fscanf", &GenericTaintChecker::preFscanf) 323 .Default(0); 324 // Check and evaluate the call. 325 if (evalFunction) 326 State = (this->*evalFunction)(CE, C); 327 if (!State) 328 return; 329 C.addTransition(State); 330 331 } 332 333 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, 334 CheckerContext &C) const { 335 ProgramStateRef State = C.getState(); 336 337 // Depending on what was tainted at pre-visit, we determined a set of 338 // arguments which should be tainted after the function returns. These are 339 // stored in the state as TaintArgsOnPostVisit set. 340 llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>(); 341 if (TaintArgs.isEmpty()) 342 return false; 343 344 for (llvm::ImmutableSet<unsigned>::iterator 345 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { 346 unsigned ArgNum = *I; 347 348 // Special handling for the tainted return value. 349 if (ArgNum == ReturnValueIndex) { 350 State = State->addTaint(CE, C.getLocationContext()); 351 continue; 352 } 353 354 // The arguments are pointer arguments. The data they are pointing at is 355 // tainted after the call. 356 if (CE->getNumArgs() < (ArgNum + 1)) 357 return false; 358 const Expr* Arg = CE->getArg(ArgNum); 359 SymbolRef Sym = getPointedToSymbol(C, Arg); 360 if (Sym) 361 State = State->addTaint(Sym); 362 } 363 364 // Clear up the taint info from the state. 365 State = State->remove<TaintArgsOnPostVisit>(); 366 367 if (State != C.getState()) { 368 C.addTransition(State); 369 return true; 370 } 371 return false; 372 } 373 374 void GenericTaintChecker::addSourcesPost(const CallExpr *CE, 375 CheckerContext &C) const { 376 // Define the attack surface. 377 // Set the evaluation function by switching on the callee name. 378 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 379 if (!FDecl || FDecl->getKind() != Decl::Function) 380 return; 381 382 StringRef Name = C.getCalleeName(FDecl); 383 if (Name.empty()) 384 return; 385 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 386 .Case("scanf", &GenericTaintChecker::postScanf) 387 // TODO: Add support for vfscanf & family. 388 .Case("getchar", &GenericTaintChecker::postRetTaint) 389 .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint) 390 .Case("getenv", &GenericTaintChecker::postRetTaint) 391 .Case("fopen", &GenericTaintChecker::postRetTaint) 392 .Case("fdopen", &GenericTaintChecker::postRetTaint) 393 .Case("freopen", &GenericTaintChecker::postRetTaint) 394 .Case("getch", &GenericTaintChecker::postRetTaint) 395 .Case("wgetch", &GenericTaintChecker::postRetTaint) 396 .Case("socket", &GenericTaintChecker::postSocket) 397 .Default(0); 398 399 // If the callee isn't defined, it is not of security concern. 400 // Check and evaluate the call. 401 ProgramStateRef State = 0; 402 if (evalFunction) 403 State = (this->*evalFunction)(CE, C); 404 if (!State) 405 return; 406 407 C.addTransition(State); 408 } 409 410 bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ 411 412 if (checkUncontrolledFormatString(CE, C)) 413 return true; 414 415 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 416 if (!FDecl || FDecl->getKind() != Decl::Function) 417 return false; 418 419 StringRef Name = C.getCalleeName(FDecl); 420 if (Name.empty()) 421 return false; 422 423 if (checkSystemCall(CE, Name, C)) 424 return true; 425 426 if (checkTaintedBufferSize(CE, FDecl, C)) 427 return true; 428 429 return false; 430 } 431 432 SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C, 433 const Expr* Arg) { 434 ProgramStateRef State = C.getState(); 435 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext()); 436 if (AddrVal.isUnknownOrUndef()) 437 return 0; 438 439 Loc *AddrLoc = dyn_cast<Loc>(&AddrVal); 440 if (!AddrLoc) 441 return 0; 442 443 const PointerType *ArgTy = 444 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr()); 445 SVal Val = State->getSVal(*AddrLoc, 446 ArgTy ? ArgTy->getPointeeType(): QualType()); 447 return Val.getAsSymbol(); 448 } 449 450 ProgramStateRef 451 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, 452 CheckerContext &C) const { 453 ProgramStateRef State = C.getState(); 454 455 // Check for taint in arguments. 456 bool IsTainted = false; 457 for (ArgVector::const_iterator I = SrcArgs.begin(), 458 E = SrcArgs.end(); I != E; ++I) { 459 unsigned ArgNum = *I; 460 461 if (ArgNum == InvalidArgIndex) { 462 // Check if any of the arguments is tainted, but skip the 463 // destination arguments. 464 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 465 if (isDestinationArgument(i)) 466 continue; 467 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) 468 break; 469 } 470 break; 471 } 472 473 if (CE->getNumArgs() < (ArgNum + 1)) 474 return State; 475 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) 476 break; 477 } 478 if (!IsTainted) 479 return State; 480 481 // Mark the arguments which should be tainted after the function returns. 482 for (ArgVector::const_iterator I = DstArgs.begin(), 483 E = DstArgs.end(); I != E; ++I) { 484 unsigned ArgNum = *I; 485 486 // Should we mark all arguments as tainted? 487 if (ArgNum == InvalidArgIndex) { 488 // For all pointer and references that were passed in: 489 // If they are not pointing to const data, mark data as tainted. 490 // TODO: So far we are just going one level down; ideally we'd need to 491 // recurse here. 492 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 493 const Expr *Arg = CE->getArg(i); 494 // Process pointer argument. 495 const Type *ArgTy = Arg->getType().getTypePtr(); 496 QualType PType = ArgTy->getPointeeType(); 497 if ((!PType.isNull() && !PType.isConstQualified()) 498 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) 499 State = State->add<TaintArgsOnPostVisit>(i); 500 } 501 continue; 502 } 503 504 // Should mark the return value? 505 if (ArgNum == ReturnValueIndex) { 506 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 507 continue; 508 } 509 510 // Mark the given argument. 511 assert(ArgNum < CE->getNumArgs()); 512 State = State->add<TaintArgsOnPostVisit>(ArgNum); 513 } 514 515 return State; 516 } 517 518 519 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0 520 // and arg 1 should get taint. 521 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE, 522 CheckerContext &C) const { 523 assert(CE->getNumArgs() >= 2); 524 ProgramStateRef State = C.getState(); 525 526 // Check is the file descriptor is tainted. 527 if (State->isTainted(CE->getArg(0), C.getLocationContext()) || 528 isStdin(CE->getArg(0), C)) { 529 // All arguments except for the first two should get taint. 530 for (unsigned int i = 2; i < CE->getNumArgs(); ++i) 531 State = State->add<TaintArgsOnPostVisit>(i); 532 return State; 533 } 534 535 return 0; 536 } 537 538 539 // If argument 0(protocol domain) is network, the return value should get taint. 540 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE, 541 CheckerContext &C) const { 542 ProgramStateRef State = C.getState(); 543 if (CE->getNumArgs() < 3) 544 return State; 545 546 SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); 547 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 548 // White list the internal communication protocols. 549 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 550 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 551 return State; 552 State = State->addTaint(CE, C.getLocationContext()); 553 return State; 554 } 555 556 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE, 557 CheckerContext &C) const { 558 ProgramStateRef State = C.getState(); 559 if (CE->getNumArgs() < 2) 560 return State; 561 562 // All arguments except for the very first one should get taint. 563 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) { 564 // The arguments are pointer arguments. The data they are pointing at is 565 // tainted after the call. 566 const Expr* Arg = CE->getArg(i); 567 SymbolRef Sym = getPointedToSymbol(C, Arg); 568 if (Sym) 569 State = State->addTaint(Sym); 570 } 571 return State; 572 } 573 574 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE, 575 CheckerContext &C) const { 576 return C.getState()->addTaint(CE, C.getLocationContext()); 577 } 578 579 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 580 ProgramStateRef State = C.getState(); 581 SVal Val = State->getSVal(E, C.getLocationContext()); 582 583 // stdin is a pointer, so it would be a region. 584 const MemRegion *MemReg = Val.getAsRegion(); 585 586 // The region should be symbolic, we do not know it's value. 587 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 588 if (!SymReg) 589 return false; 590 591 // Get it's symbol and find the declaration region it's pointing to. 592 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 593 if (!Sm) 594 return false; 595 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 596 if (!DeclReg) 597 return false; 598 599 // This region corresponds to a declaration, find out if it's a global/extern 600 // variable named stdin with the proper type. 601 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 602 D = D->getCanonicalDecl(); 603 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) 604 if (const PointerType * PtrTy = 605 dyn_cast<PointerType>(D->getType().getTypePtr())) 606 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType()) 607 return true; 608 } 609 return false; 610 } 611 612 static bool getPrintfFormatArgumentNum(const CallExpr *CE, 613 const CheckerContext &C, 614 unsigned int &ArgNum) { 615 // Find if the function contains a format string argument. 616 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 617 // vsnprintf, syslog, custom annotated functions. 618 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 619 if (!FDecl) 620 return false; 621 for (specific_attr_iterator<FormatAttr> 622 i = FDecl->specific_attr_begin<FormatAttr>(), 623 e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) { 624 625 const FormatAttr *Format = *i; 626 ArgNum = Format->getFormatIdx() - 1; 627 if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum) 628 return true; 629 } 630 631 // Or if a function is named setproctitle (this is a heuristic). 632 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 633 ArgNum = 0; 634 return true; 635 } 636 637 return false; 638 } 639 640 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, 641 const char Msg[], 642 CheckerContext &C) const { 643 assert(E); 644 645 // Check for taint. 646 ProgramStateRef State = C.getState(); 647 if (!State->isTainted(getPointedToSymbol(C, E)) && 648 !State->isTainted(E, C.getLocationContext())) 649 return false; 650 651 // Generate diagnostic. 652 if (ExplodedNode *N = C.addTransition()) { 653 initBugType(); 654 BugReport *report = new BugReport(*BT, Msg, N); 655 report->addRange(E->getSourceRange()); 656 C.EmitReport(report); 657 return true; 658 } 659 return false; 660 } 661 662 bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE, 663 CheckerContext &C) const{ 664 // Check if the function contains a format string argument. 665 unsigned int ArgNum = 0; 666 if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 667 return false; 668 669 // If either the format string content or the pointer itself are tainted, warn. 670 if (generateReportIfTainted(CE->getArg(ArgNum), 671 MsgUncontrolledFormatString, C)) 672 return true; 673 return false; 674 } 675 676 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, 677 StringRef Name, 678 CheckerContext &C) const { 679 // TODO: It might make sense to run this check on demand. In some cases, 680 // we should check if the environment has been cleansed here. We also might 681 // need to know if the user was reset before these calls(seteuid). 682 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 683 .Case("system", 0) 684 .Case("popen", 0) 685 .Case("execl", 0) 686 .Case("execle", 0) 687 .Case("execlp", 0) 688 .Case("execv", 0) 689 .Case("execvp", 0) 690 .Case("execvP", 0) 691 .Case("execve", 0) 692 .Case("dlopen", 0) 693 .Default(UINT_MAX); 694 695 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1)) 696 return false; 697 698 if (generateReportIfTainted(CE->getArg(ArgNum), 699 MsgSanitizeSystemArgs, C)) 700 return true; 701 702 return false; 703 } 704 705 // TODO: Should this check be a part of the CString checker? 706 // If yes, should taint be a global setting? 707 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, 708 const FunctionDecl *FDecl, 709 CheckerContext &C) const { 710 // If the function has a buffer size argument, set ArgNum. 711 unsigned ArgNum = InvalidArgIndex; 712 unsigned BId = 0; 713 if ( (BId = FDecl->getMemoryFunctionKind()) ) 714 switch(BId) { 715 case Builtin::BImemcpy: 716 case Builtin::BImemmove: 717 case Builtin::BIstrncpy: 718 ArgNum = 2; 719 break; 720 case Builtin::BIstrndup: 721 ArgNum = 1; 722 break; 723 default: 724 break; 725 }; 726 727 if (ArgNum == InvalidArgIndex) { 728 if (C.isCLibraryFunction(FDecl, "malloc") || 729 C.isCLibraryFunction(FDecl, "calloc") || 730 C.isCLibraryFunction(FDecl, "alloca")) 731 ArgNum = 0; 732 else if (C.isCLibraryFunction(FDecl, "memccpy")) 733 ArgNum = 3; 734 else if (C.isCLibraryFunction(FDecl, "realloc")) 735 ArgNum = 1; 736 else if (C.isCLibraryFunction(FDecl, "bcopy")) 737 ArgNum = 2; 738 } 739 740 if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && 741 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C)) 742 return true; 743 744 return false; 745 } 746 747 void ento::registerGenericTaintChecker(CheckerManager &mgr) { 748 mgr.registerChecker<GenericTaintChecker>(); 749 } 750