1 //=== IteratorsChecker.cpp - Check for Invalidated Iterators ------*- C++ -*---- 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This defines IteratorsChecker, a number of small checks for conditions 11 // leading to invalid iterators being used. 12 // FIXME: Currently only supports 'vector' and 'deque' 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "clang/AST/DeclTemplate.h" 17 #include "clang/Basic/SourceManager.h" 18 #include "ClangSACheckers.h" 19 #include "clang/StaticAnalyzer/Core/Checker.h" 20 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 21 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 23 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 24 #include "clang/AST/DeclCXX.h" 25 #include "clang/AST/ExprCXX.h" 26 #include "clang/AST/Type.h" 27 #include "clang/AST/PrettyPrinter.h" 28 #include "llvm/ADT/SmallPtrSet.h" 29 #include "llvm/ADT/StringSwitch.h" 30 31 32 using namespace clang; 33 using namespace ento; 34 35 // This is the state associated with each iterator which includes both the 36 // kind of state and the instance used to initialize it. 37 // FIXME: add location where invalidated for better error reporting. 38 namespace { 39 class RefState { 40 enum Kind { BeginValid, EndValid, Invalid, Undefined, Unknown } K; 41 const void *VR; 42 43 public: 44 RefState(Kind k, const void *vr) : K(k), VR(vr) {} 45 46 bool isValid() const { return K == BeginValid || K == EndValid; } 47 bool isInvalid() const { return K == Invalid; } 48 bool isUndefined() const { return K == Undefined; } 49 bool isUnknown() const { return K == Unknown; } 50 const MemRegion *getMemRegion() const { 51 if (K == BeginValid || K == EndValid) 52 return(const MemRegion *)VR; 53 return 0; 54 } 55 const MemberExpr *getMemberExpr() const { 56 if (K == Invalid) 57 return(const MemberExpr *)VR; 58 return 0; 59 } 60 61 bool operator==(const RefState &X) const { 62 return K == X.K && VR == X.VR; 63 } 64 65 static RefState getBeginValid(const MemRegion *vr) { 66 assert(vr); 67 return RefState(BeginValid, vr); 68 } 69 static RefState getEndValid(const MemRegion *vr) { 70 assert(vr); 71 return RefState(EndValid, vr); 72 } 73 static RefState getInvalid( const MemberExpr *ME ) { 74 return RefState(Invalid, ME); 75 } 76 static RefState getUndefined( void ) { 77 return RefState(Undefined, 0); 78 } 79 static RefState getUnknown( void ) { 80 return RefState(Unknown, 0); 81 } 82 83 void Profile(llvm::FoldingSetNodeID &ID) const { 84 ID.AddInteger(K); 85 ID.AddPointer(VR); 86 } 87 }; 88 89 enum RefKind { NoKind, VectorKind, VectorIteratorKind }; 90 91 class IteratorsChecker : 92 public Checker<check::PreStmt<CXXOperatorCallExpr>, 93 check::PreStmt<DeclStmt>, 94 check::PreStmt<CXXMemberCallExpr>, 95 check::PreStmt<CallExpr> > 96 { 97 // Used when parsing iterators and vectors and deques. 98 BuiltinBug *BT_Invalid, *BT_Undefined, *BT_Incompatible; 99 100 public: 101 IteratorsChecker() : 102 BT_Invalid(0), BT_Undefined(0), BT_Incompatible(0) 103 {} 104 static void *getTag() { static int tag; return &tag; } 105 106 // Checker entry points. 107 void checkPreStmt(const CXXOperatorCallExpr *OCE, 108 CheckerContext &C) const; 109 110 void checkPreStmt(const DeclStmt *DS, 111 CheckerContext &C) const; 112 113 void checkPreStmt(const CXXMemberCallExpr *MCE, 114 CheckerContext &C) const; 115 116 void checkPreStmt(const CallExpr *CE, 117 CheckerContext &C) const; 118 119 private: 120 ProgramStateRef handleAssign(ProgramStateRef state, 121 const Expr *lexp, 122 const Expr *rexp, 123 const LocationContext *LC) const; 124 125 ProgramStateRef handleAssign(ProgramStateRef state, 126 const MemRegion *MR, 127 const Expr *rexp, 128 const LocationContext *LC) const; 129 130 ProgramStateRef invalidateIterators(ProgramStateRef state, 131 const MemRegion *MR, 132 const MemberExpr *ME) const; 133 134 void checkExpr(CheckerContext &C, const Expr *E) const; 135 136 void checkArgs(CheckerContext &C, const CallExpr *CE) const; 137 138 const MemRegion *getRegion(ProgramStateRef state, 139 const Expr *E, 140 const LocationContext *LC) const; 141 142 const DeclRefExpr *getDeclRefExpr(const Expr *E) const; 143 }; 144 145 class IteratorState { 146 public: 147 typedef llvm::ImmutableMap<const MemRegion *, RefState> EntryMap; 148 }; 149 } //end anonymous namespace 150 151 namespace clang { 152 namespace ento { 153 template <> 154 struct ProgramStateTrait<IteratorState> 155 : public ProgramStatePartialTrait<IteratorState::EntryMap> { 156 static void *GDMIndex() { return IteratorsChecker::getTag(); } 157 }; 158 } 159 } 160 161 void ento::registerIteratorsChecker(CheckerManager &mgr) { 162 mgr.registerChecker<IteratorsChecker>(); 163 } 164 165 // =============================================== 166 // Utility functions used by visitor functions 167 // =============================================== 168 169 // check a templated type for std::vector or std::deque 170 static RefKind getTemplateKind(const NamedDecl *td) { 171 const DeclContext *dc = td->getDeclContext(); 172 const NamespaceDecl *nameSpace = dyn_cast<NamespaceDecl>(dc); 173 if (!nameSpace || !isa<TranslationUnitDecl>(nameSpace->getDeclContext()) 174 || nameSpace->getName() != "std") 175 return NoKind; 176 177 StringRef name = td->getName(); 178 return llvm::StringSwitch<RefKind>(name) 179 .Cases("vector", "deque", VectorKind) 180 .Default(NoKind); 181 } 182 183 static RefKind getTemplateKind(const DeclContext *dc) { 184 if (const ClassTemplateSpecializationDecl *td = 185 dyn_cast<ClassTemplateSpecializationDecl>(dc)) 186 return getTemplateKind(cast<NamedDecl>(td)); 187 return NoKind; 188 } 189 190 static RefKind getTemplateKind(const TypedefType *tdt) { 191 const TypedefNameDecl *td = tdt->getDecl(); 192 RefKind parentKind = getTemplateKind(td->getDeclContext()); 193 if (parentKind == VectorKind) { 194 return llvm::StringSwitch<RefKind>(td->getName()) 195 .Cases("iterator", 196 "const_iterator", 197 "reverse_iterator", VectorIteratorKind) 198 .Default(NoKind); 199 } 200 return NoKind; 201 } 202 203 static RefKind getTemplateKind(const TemplateSpecializationType *tsp) { 204 const TemplateName &tname = tsp->getTemplateName(); 205 TemplateDecl *td = tname.getAsTemplateDecl(); 206 if (!td) 207 return NoKind; 208 return getTemplateKind(td); 209 } 210 211 static RefKind getTemplateKind(QualType T) { 212 if (const TemplateSpecializationType *tsp = 213 T->getAs<TemplateSpecializationType>()) { 214 return getTemplateKind(tsp); 215 } 216 if (const ElaboratedType *ET = dyn_cast<ElaboratedType>(T)) { 217 QualType namedType = ET->getNamedType(); 218 if (const TypedefType *tdt = namedType->getAs<TypedefType>()) 219 return getTemplateKind(tdt); 220 if (const TemplateSpecializationType *tsp = 221 namedType->getAs<TemplateSpecializationType>()) { 222 return getTemplateKind(tsp); 223 } 224 } 225 return NoKind; 226 } 227 228 // Iterate through our map and invalidate any iterators that were 229 // initialized fromt the specified instance MemRegion. 230 ProgramStateRef IteratorsChecker::invalidateIterators(ProgramStateRef state, 231 const MemRegion *MR, const MemberExpr *ME) const { 232 IteratorState::EntryMap Map = state->get<IteratorState>(); 233 if (Map.isEmpty()) 234 return state; 235 236 // Loop over the entries in the current state. 237 // The key doesn't change, so the map iterators won't change. 238 for (IteratorState::EntryMap::iterator I = Map.begin(), E = Map.end(); 239 I != E; ++I) { 240 RefState RS = I.getData(); 241 if (RS.getMemRegion() == MR) 242 state = state->set<IteratorState>(I.getKey(), RefState::getInvalid(ME)); 243 } 244 245 return state; 246 } 247 248 // Handle assigning to an iterator where we don't have the LValue MemRegion. 249 ProgramStateRef IteratorsChecker::handleAssign(ProgramStateRef state, 250 const Expr *lexp, const Expr *rexp, const LocationContext *LC) const { 251 // Skip the cast if present. 252 if (const MaterializeTemporaryExpr *M 253 = dyn_cast<MaterializeTemporaryExpr>(lexp)) 254 lexp = M->GetTemporaryExpr(); 255 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(lexp)) 256 lexp = ICE->getSubExpr(); 257 SVal sv = state->getSVal(lexp, LC); 258 const MemRegion *MR = sv.getAsRegion(); 259 if (!MR) 260 return state; 261 RefKind kind = getTemplateKind(lexp->getType()); 262 263 // If assigning to a vector, invalidate any iterators currently associated. 264 if (kind == VectorKind) 265 return invalidateIterators(state, MR, 0); 266 267 // Make sure that we are assigning to an iterator. 268 if (getTemplateKind(lexp->getType()) != VectorIteratorKind) 269 return state; 270 return handleAssign(state, MR, rexp, LC); 271 } 272 273 // handle assigning to an iterator 274 ProgramStateRef IteratorsChecker::handleAssign(ProgramStateRef state, 275 const MemRegion *MR, const Expr *rexp, const LocationContext *LC) const { 276 // Assume unknown until we find something definite. 277 state = state->set<IteratorState>(MR, RefState::getUnknown()); 278 if (const MaterializeTemporaryExpr *M 279 = dyn_cast<MaterializeTemporaryExpr>(rexp)) 280 rexp = M->GetTemporaryExpr(); 281 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(rexp)) 282 rexp = ICE->getSubExpr(); 283 // Need to handle three cases: MemberCall, copy, copy with addition. 284 if (const CallExpr *CE = dyn_cast<CallExpr>(rexp)) { 285 // Handle MemberCall. 286 if (const MemberExpr *ME = dyn_cast<MemberExpr>(CE->getCallee())) { 287 const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(ME->getBase()); 288 if (!DRE) 289 return state; 290 // Verify that the type is std::vector<T>. 291 if (getTemplateKind(DRE->getType()) != VectorKind) 292 return state; 293 // Now get the MemRegion associated with the instance. 294 const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl()); 295 if (!VD) 296 return state; 297 const MemRegion *IMR = state->getRegion(VD, LC); 298 if (!IMR) 299 return state; 300 // Finally, see if it is one of the calls that will create 301 // a valid iterator and mark it if so, else mark as Unknown. 302 StringRef mName = ME->getMemberDecl()->getName(); 303 304 if (llvm::StringSwitch<bool>(mName) 305 .Cases("begin", "insert", "erase", true).Default(false)) { 306 return state->set<IteratorState>(MR, RefState::getBeginValid(IMR)); 307 } 308 if (mName == "end") 309 return state->set<IteratorState>(MR, RefState::getEndValid(IMR)); 310 311 return state->set<IteratorState>(MR, RefState::getUnknown()); 312 } 313 } 314 // Handle straight copy from another iterator. 315 if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(rexp)) { 316 if (getTemplateKind(DRE->getType()) != VectorIteratorKind) 317 return state; 318 // Now get the MemRegion associated with the instance. 319 const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl()); 320 if (!VD) 321 return state; 322 const MemRegion *IMR = state->getRegion(VD, LC); 323 if (!IMR) 324 return state; 325 // Get the RefState of the iterator being copied. 326 const RefState *RS = state->get<IteratorState>(IMR); 327 if (!RS) 328 return state; 329 // Use it to set the state of the LValue. 330 return state->set<IteratorState>(MR, *RS); 331 } 332 // If we have operator+ or operator- ... 333 if (const CXXOperatorCallExpr *OCE = dyn_cast<CXXOperatorCallExpr>(rexp)) { 334 OverloadedOperatorKind Kind = OCE->getOperator(); 335 if (Kind == OO_Plus || Kind == OO_Minus) { 336 // Check left side of tree for a valid value. 337 state = handleAssign( state, MR, OCE->getArg(0), LC); 338 const RefState *RS = state->get<IteratorState>(MR); 339 // If found, return it. 340 if (!RS->isUnknown()) 341 return state; 342 // Otherwise return what we find in the right side. 343 return handleAssign(state, MR, OCE->getArg(1), LC); 344 } 345 } 346 // Fall through if nothing matched. 347 return state; 348 } 349 350 // Iterate through the arguments looking for an Invalid or Undefined iterator. 351 void IteratorsChecker::checkArgs(CheckerContext &C, const CallExpr *CE) const { 352 for (CallExpr::const_arg_iterator I = CE->arg_begin(), E = CE->arg_end(); 353 I != E; ++I) { 354 checkExpr(C, *I); 355 } 356 } 357 358 // Get the DeclRefExpr associated with the expression. 359 const DeclRefExpr *IteratorsChecker::getDeclRefExpr(const Expr *E) const { 360 // If it is a CXXConstructExpr, need to get the subexpression. 361 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(E)) { 362 if (CE->getNumArgs()== 1) { 363 CXXConstructorDecl *CD = CE->getConstructor(); 364 if (CD->isTrivial()) 365 E = CE->getArg(0); 366 } 367 } 368 if (const MaterializeTemporaryExpr *M = dyn_cast<MaterializeTemporaryExpr>(E)) 369 E = M->GetTemporaryExpr(); 370 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(E)) 371 E = ICE->getSubExpr(); 372 // If it isn't one of our types, don't do anything. 373 if (getTemplateKind(E->getType()) != VectorIteratorKind) 374 return NULL; 375 return dyn_cast<DeclRefExpr>(E); 376 } 377 378 // Get the MemRegion associated with the expresssion. 379 const MemRegion *IteratorsChecker::getRegion(ProgramStateRef state, 380 const Expr *E, const LocationContext *LC) const { 381 const DeclRefExpr *DRE = getDeclRefExpr(E); 382 if (!DRE) 383 return NULL; 384 const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl()); 385 if (!VD) 386 return NULL; 387 // return the MemRegion associated with the iterator 388 return state->getRegion(VD, LC); 389 } 390 391 // Check the expression and if it is an iterator, generate a diagnostic 392 // if the iterator is not valid. 393 // FIXME: this method can generate new nodes, and subsequent logic should 394 // use those nodes. We also cannot create multiple nodes at one ProgramPoint 395 // with the same tag. 396 void IteratorsChecker::checkExpr(CheckerContext &C, const Expr *E) const { 397 ProgramStateRef state = C.getState(); 398 const MemRegion *MR = getRegion(state, E, C.getLocationContext()); 399 if (!MR) 400 return; 401 402 // Get the state associated with the iterator. 403 const RefState *RS = state->get<IteratorState>(MR); 404 if (!RS) 405 return; 406 if (RS->isInvalid()) { 407 if (ExplodedNode *N = C.addTransition()) { 408 if (!BT_Invalid) 409 // FIXME: We are eluding constness here. 410 const_cast<IteratorsChecker*>(this)->BT_Invalid = new BuiltinBug(""); 411 412 std::string msg; 413 const MemberExpr *ME = RS->getMemberExpr(); 414 if (ME) { 415 std::string name = ME->getMemberNameInfo().getAsString(); 416 msg = "Attempt to use an iterator made invalid by call to '" + 417 name + "'"; 418 } 419 else { 420 msg = "Attempt to use an iterator made invalid by copying another " 421 "container to its container"; 422 } 423 424 BugReport *R = new BugReport(*BT_Invalid, msg, N); 425 R->addRange(getDeclRefExpr(E)->getSourceRange()); 426 C.EmitReport(R); 427 } 428 } 429 else if (RS->isUndefined()) { 430 if (ExplodedNode *N = C.addTransition()) { 431 if (!BT_Undefined) 432 // FIXME: We are eluding constness here. 433 const_cast<IteratorsChecker*>(this)->BT_Undefined = 434 new BuiltinBug("Use of iterator that is not defined"); 435 436 BugReport *R = new BugReport(*BT_Undefined, 437 BT_Undefined->getDescription(), N); 438 R->addRange(getDeclRefExpr(E)->getSourceRange()); 439 C.EmitReport(R); 440 } 441 } 442 } 443 444 // =============================================== 445 // Path analysis visitor functions 446 // =============================================== 447 448 // For a generic Call, just check the args for bad iterators. 449 void IteratorsChecker::checkPreStmt(const CallExpr *CE, 450 CheckerContext &C) const{ 451 452 // FIXME: These checks are to currently work around a bug 453 // in CheckerManager. 454 if (isa<CXXOperatorCallExpr>(CE)) 455 return; 456 if (isa<CXXMemberCallExpr>(CE)) 457 return; 458 459 checkArgs(C, CE); 460 } 461 462 // Handle operator calls. First, if it is operator=, check the argument, 463 // and handle assigning and set target state appropriately. Otherwise, for 464 // other operators, check the args for bad iterators and handle comparisons. 465 void IteratorsChecker::checkPreStmt(const CXXOperatorCallExpr *OCE, 466 CheckerContext &C) const 467 { 468 const LocationContext *LC = C.getLocationContext(); 469 ProgramStateRef state = C.getState(); 470 OverloadedOperatorKind Kind = OCE->getOperator(); 471 if (Kind == OO_Equal) { 472 checkExpr(C, OCE->getArg(1)); 473 state = handleAssign(state, OCE->getArg(0), OCE->getArg(1), LC); 474 C.addTransition(state); 475 return; 476 } 477 else { 478 checkArgs(C, OCE); 479 // If it is a compare and both are iterators, ensure that they are for 480 // the same container. 481 if (Kind == OO_EqualEqual || Kind == OO_ExclaimEqual || 482 Kind == OO_Less || Kind == OO_LessEqual || 483 Kind == OO_Greater || Kind == OO_GreaterEqual) { 484 const MemRegion *MR0, *MR1; 485 MR0 = getRegion(state, OCE->getArg(0), LC); 486 if (!MR0) 487 return; 488 MR1 = getRegion(state, OCE->getArg(1), LC); 489 if (!MR1) 490 return; 491 const RefState *RS0, *RS1; 492 RS0 = state->get<IteratorState>(MR0); 493 if (!RS0) 494 return; 495 RS1 = state->get<IteratorState>(MR1); 496 if (!RS1) 497 return; 498 if (RS0->getMemRegion() != RS1->getMemRegion()) { 499 if (ExplodedNode *N = C.addTransition()) { 500 if (!BT_Incompatible) 501 const_cast<IteratorsChecker*>(this)->BT_Incompatible = 502 new BuiltinBug( 503 "Cannot compare iterators from different containers"); 504 505 BugReport *R = new BugReport(*BT_Incompatible, 506 BT_Incompatible->getDescription(), N); 507 R->addRange(OCE->getSourceRange()); 508 C.EmitReport(R); 509 } 510 } 511 } 512 } 513 } 514 515 // Need to handle DeclStmts to pick up initializing of iterators and to mark 516 // uninitialized ones as Undefined. 517 void IteratorsChecker::checkPreStmt(const DeclStmt *DS, 518 CheckerContext &C) const { 519 const Decl *D = *DS->decl_begin(); 520 const VarDecl *VD = dyn_cast<VarDecl>(D); 521 // Only care about iterators. 522 if (getTemplateKind(VD->getType()) != VectorIteratorKind) 523 return; 524 525 // Get the MemRegion associated with the iterator and mark it as Undefined. 526 ProgramStateRef state = C.getState(); 527 Loc VarLoc = state->getLValue(VD, C.getLocationContext()); 528 const MemRegion *MR = VarLoc.getAsRegion(); 529 if (!MR) 530 return; 531 state = state->set<IteratorState>(MR, RefState::getUndefined()); 532 533 // if there is an initializer, handle marking Valid if a proper initializer 534 const Expr *InitEx = VD->getInit(); 535 if (InitEx) { 536 // FIXME: This is too syntactic. Since 'InitEx' will be analyzed first 537 // it should resolve to an SVal that we can check for validity 538 // *semantically* instead of walking through the AST. 539 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(InitEx)) { 540 if (CE->getNumArgs() == 1) { 541 const Expr *E = CE->getArg(0); 542 if (const MaterializeTemporaryExpr *M 543 = dyn_cast<MaterializeTemporaryExpr>(E)) 544 E = M->GetTemporaryExpr(); 545 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(E)) 546 InitEx = ICE->getSubExpr(); 547 state = handleAssign(state, MR, InitEx, C.getLocationContext()); 548 } 549 } 550 } 551 C.addTransition(state); 552 } 553 554 555 namespace { struct CalledReserved {}; } 556 namespace clang { namespace ento { 557 template<> struct ProgramStateTrait<CalledReserved> 558 : public ProgramStatePartialTrait<llvm::ImmutableSet<const MemRegion*> > { 559 static void *GDMIndex() { static int index = 0; return &index; } 560 }; 561 }} 562 563 // on a member call, first check the args for any bad iterators 564 // then, check to see if it is a call to a function that will invalidate 565 // the iterators 566 void IteratorsChecker::checkPreStmt(const CXXMemberCallExpr *MCE, 567 CheckerContext &C) const { 568 // Check the arguments. 569 checkArgs(C, MCE); 570 const MemberExpr *ME = dyn_cast<MemberExpr>(MCE->getCallee()); 571 if (!ME) 572 return; 573 // Make sure we have the right kind of container. 574 const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(ME->getBase()); 575 if (!DRE || getTemplateKind(DRE->getType()) != VectorKind) 576 return; 577 SVal tsv = C.getState()->getSVal(DRE, C.getLocationContext()); 578 // Get the MemRegion associated with the container instance. 579 const MemRegion *MR = tsv.getAsRegion(); 580 if (!MR) 581 return; 582 // If we are calling a function that invalidates iterators, mark them 583 // appropriately by finding matching instances. 584 ProgramStateRef state = C.getState(); 585 StringRef mName = ME->getMemberDecl()->getName(); 586 if (llvm::StringSwitch<bool>(mName) 587 .Cases("insert", "reserve", "push_back", true) 588 .Cases("erase", "pop_back", "clear", "resize", true) 589 .Default(false)) { 590 // If there was a 'reserve' call, assume iterators are good. 591 if (!state->contains<CalledReserved>(MR)) 592 state = invalidateIterators(state, MR, ME); 593 } 594 // Keep track of instances that have called 'reserve' 595 // note: do this after we invalidate any iterators by calling 596 // 'reserve' itself. 597 if (mName == "reserve") 598 state = state->add<CalledReserved>(MR); 599 600 if (state != C.getState()) 601 C.addTransition(state); 602 } 603 604