1 //=== IteratorsChecker.cpp - Check for Invalidated Iterators ------*- C++ -*---- 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This defines IteratorsChecker, a number of small checks for conditions 11 // leading to invalid iterators being used. 12 // FIXME: Currently only supports 'vector' and 'deque' 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "clang/AST/DeclTemplate.h" 17 #include "clang/Basic/SourceManager.h" 18 #include "ClangSACheckers.h" 19 #include "clang/StaticAnalyzer/Core/Checker.h" 20 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 21 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 23 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 24 #include "clang/AST/DeclCXX.h" 25 #include "clang/AST/Decl.h" 26 #include "clang/AST/Type.h" 27 #include "clang/AST/PrettyPrinter.h" 28 #include "llvm/ADT/SmallPtrSet.h" 29 #include "llvm/ADT/StringSwitch.h" 30 31 32 using namespace clang; 33 using namespace ento; 34 35 // This is the state associated with each iterator which includes both the 36 // kind of state and the instance used to initialize it. 37 // FIXME: add location where invalidated for better error reporting. 38 namespace { 39 class RefState { 40 enum Kind { BeginValid, EndValid, Invalid, Undefined, Unknown } K; 41 const void *VR; 42 43 public: 44 RefState(Kind k, const void *vr) : K(k), VR(vr) {} 45 46 bool isValid() const { return K == BeginValid || K == EndValid; } 47 bool isInvalid() const { return K == Invalid; } 48 bool isUndefined() const { return K == Undefined; } 49 bool isUnknown() const { return K == Unknown; } 50 const MemRegion *getMemRegion() const { 51 if (K == BeginValid || K == EndValid) 52 return(const MemRegion *)VR; 53 return 0; 54 } 55 const MemberExpr *getMemberExpr() const { 56 if (K == Invalid) 57 return(const MemberExpr *)VR; 58 return 0; 59 } 60 61 bool operator==(const RefState &X) const { 62 return K == X.K && VR == X.VR; 63 } 64 65 static RefState getBeginValid(const MemRegion *vr) { 66 assert(vr); 67 return RefState(BeginValid, vr); 68 } 69 static RefState getEndValid(const MemRegion *vr) { 70 assert(vr); 71 return RefState(EndValid, vr); 72 } 73 static RefState getInvalid( const MemberExpr *ME ) { 74 return RefState(Invalid, ME); 75 } 76 static RefState getUndefined( void ) { 77 return RefState(Undefined, 0); 78 } 79 static RefState getUnknown( void ) { 80 return RefState(Unknown, 0); 81 } 82 83 void Profile(llvm::FoldingSetNodeID &ID) const { 84 ID.AddInteger(K); 85 ID.AddPointer(VR); 86 } 87 }; 88 89 enum RefKind { NoKind, VectorKind, VectorIteratorKind }; 90 91 class IteratorsChecker : 92 public Checker<check::PreStmt<CXXOperatorCallExpr>, 93 check::PreStmt<DeclStmt>, 94 check::PreStmt<CXXMemberCallExpr>, 95 check::PreStmt<CallExpr> > 96 { 97 // Used when parsing iterators and vectors and deques. 98 BuiltinBug *BT_Invalid, *BT_Undefined, *BT_Incompatible; 99 100 public: 101 IteratorsChecker() : 102 BT_Invalid(0), BT_Undefined(0), BT_Incompatible(0) 103 {} 104 static void *getTag() { static int tag; return &tag; } 105 106 // Checker entry points. 107 void checkPreStmt(const CXXOperatorCallExpr *OCE, 108 CheckerContext &C) const; 109 110 void checkPreStmt(const DeclStmt *DS, 111 CheckerContext &C) const; 112 113 void checkPreStmt(const CXXMemberCallExpr *MCE, 114 CheckerContext &C) const; 115 116 void checkPreStmt(const CallExpr *CE, 117 CheckerContext &C) const; 118 119 private: 120 const ProgramState *handleAssign(const ProgramState *state, 121 const Expr *lexp, 122 const Expr *rexp, 123 const LocationContext *LC) const; 124 125 const ProgramState *handleAssign(const ProgramState *state, 126 const MemRegion *MR, 127 const Expr *rexp, 128 const LocationContext *LC) const; 129 130 const ProgramState *invalidateIterators(const ProgramState *state, 131 const MemRegion *MR, 132 const MemberExpr *ME) const; 133 134 void checkExpr(CheckerContext &C, const Expr *E) const; 135 136 void checkArgs(CheckerContext &C, const CallExpr *CE) const; 137 138 const MemRegion *getRegion(const ProgramState *state, 139 const Expr *E, 140 const LocationContext *LC) const; 141 142 const DeclRefExpr *getDeclRefExpr(const Expr *E) const; 143 }; 144 145 class IteratorState { 146 public: 147 typedef llvm::ImmutableMap<const MemRegion *, RefState> EntryMap; 148 }; 149 } //end anonymous namespace 150 151 namespace clang { 152 namespace ento { 153 template <> 154 struct ProgramStateTrait<IteratorState> 155 : public ProgramStatePartialTrait<IteratorState::EntryMap> { 156 static void *GDMIndex() { return IteratorsChecker::getTag(); } 157 }; 158 } 159 } 160 161 void ento::registerIteratorsChecker(CheckerManager &mgr) { 162 mgr.registerChecker<IteratorsChecker>(); 163 } 164 165 // =============================================== 166 // Utility functions used by visitor functions 167 // =============================================== 168 169 // check a templated type for std::vector or std::deque 170 static RefKind getTemplateKind(const NamedDecl *td) { 171 const DeclContext *dc = td->getDeclContext(); 172 const NamespaceDecl *nameSpace = dyn_cast<NamespaceDecl>(dc); 173 if (!nameSpace || !isa<TranslationUnitDecl>(nameSpace->getDeclContext()) 174 || nameSpace->getName() != "std") 175 return NoKind; 176 177 StringRef name = td->getName(); 178 return llvm::StringSwitch<RefKind>(name) 179 .Cases("vector", "deque", VectorKind) 180 .Default(NoKind); 181 } 182 183 static RefKind getTemplateKind(const DeclContext *dc) { 184 if (const ClassTemplateSpecializationDecl *td = 185 dyn_cast<ClassTemplateSpecializationDecl>(dc)) 186 return getTemplateKind(cast<NamedDecl>(td)); 187 return NoKind; 188 } 189 190 static RefKind getTemplateKind(const TypedefType *tdt) { 191 const TypedefNameDecl *td = tdt->getDecl(); 192 RefKind parentKind = getTemplateKind(td->getDeclContext()); 193 if (parentKind == VectorKind) { 194 return llvm::StringSwitch<RefKind>(td->getName()) 195 .Cases("iterator", 196 "const_iterator", 197 "reverse_iterator", VectorIteratorKind) 198 .Default(NoKind); 199 } 200 return NoKind; 201 } 202 203 static RefKind getTemplateKind(const TemplateSpecializationType *tsp) { 204 const TemplateName &tname = tsp->getTemplateName(); 205 TemplateDecl *td = tname.getAsTemplateDecl(); 206 if (!td) 207 return NoKind; 208 return getTemplateKind(td); 209 } 210 211 static RefKind getTemplateKind(QualType T) { 212 if (const TemplateSpecializationType *tsp = 213 T->getAs<TemplateSpecializationType>()) { 214 return getTemplateKind(tsp); 215 } 216 if (const ElaboratedType *ET = dyn_cast<ElaboratedType>(T)) { 217 QualType namedType = ET->getNamedType(); 218 if (const TypedefType *tdt = namedType->getAs<TypedefType>()) 219 return getTemplateKind(tdt); 220 if (const TemplateSpecializationType *tsp = 221 namedType->getAs<TemplateSpecializationType>()) { 222 return getTemplateKind(tsp); 223 } 224 } 225 return NoKind; 226 } 227 228 // Iterate through our map and invalidate any iterators that were 229 // initialized fromt the specified instance MemRegion. 230 const ProgramState *IteratorsChecker::invalidateIterators(const ProgramState *state, 231 const MemRegion *MR, const MemberExpr *ME) const { 232 IteratorState::EntryMap Map = state->get<IteratorState>(); 233 if (Map.isEmpty()) 234 return state; 235 236 // Loop over the entries in the current state. 237 // The key doesn't change, so the map iterators won't change. 238 for (IteratorState::EntryMap::iterator I = Map.begin(), E = Map.end(); 239 I != E; ++I) { 240 RefState RS = I.getData(); 241 if (RS.getMemRegion() == MR) 242 state = state->set<IteratorState>(I.getKey(), RefState::getInvalid(ME)); 243 } 244 245 return state; 246 } 247 248 // Handle assigning to an iterator where we don't have the LValue MemRegion. 249 const ProgramState *IteratorsChecker::handleAssign(const ProgramState *state, 250 const Expr *lexp, const Expr *rexp, const LocationContext *LC) const { 251 // Skip the cast if present. 252 if (const MaterializeTemporaryExpr *M 253 = dyn_cast<MaterializeTemporaryExpr>(lexp)) 254 lexp = M->GetTemporaryExpr(); 255 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(lexp)) 256 lexp = ICE->getSubExpr(); 257 SVal sv = state->getSVal(lexp); 258 const MemRegion *MR = sv.getAsRegion(); 259 if (!MR) 260 return state; 261 RefKind kind = getTemplateKind(lexp->getType()); 262 263 // If assigning to a vector, invalidate any iterators currently associated. 264 if (kind == VectorKind) 265 return invalidateIterators(state, MR, 0); 266 267 // Make sure that we are assigning to an iterator. 268 if (getTemplateKind(lexp->getType()) != VectorIteratorKind) 269 return state; 270 return handleAssign(state, MR, rexp, LC); 271 } 272 273 // handle assigning to an iterator 274 const ProgramState *IteratorsChecker::handleAssign(const ProgramState *state, 275 const MemRegion *MR, const Expr *rexp, const LocationContext *LC) const { 276 // Assume unknown until we find something definite. 277 state = state->set<IteratorState>(MR, RefState::getUnknown()); 278 if (const MaterializeTemporaryExpr *M 279 = dyn_cast<MaterializeTemporaryExpr>(rexp)) 280 rexp = M->GetTemporaryExpr(); 281 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(rexp)) 282 rexp = ICE->getSubExpr(); 283 // Need to handle three cases: MemberCall, copy, copy with addition. 284 if (const CallExpr *CE = dyn_cast<CallExpr>(rexp)) { 285 // Handle MemberCall. 286 if (const MemberExpr *ME = dyn_cast<MemberExpr>(CE->getCallee())) { 287 const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(ME->getBase()); 288 if (!DRE) 289 return state; 290 // Verify that the type is std::vector<T>. 291 if (getTemplateKind(DRE->getType()) != VectorKind) 292 return state; 293 // Now get the MemRegion associated with the instance. 294 const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl()); 295 if (!VD) 296 return state; 297 const MemRegion *IMR = state->getRegion(VD, LC); 298 if (!IMR) 299 return state; 300 // Finally, see if it is one of the calls that will create 301 // a valid iterator and mark it if so, else mark as Unknown. 302 StringRef mName = ME->getMemberDecl()->getName(); 303 304 if (llvm::StringSwitch<bool>(mName) 305 .Cases("begin", "insert", "erase", true).Default(false)) { 306 return state->set<IteratorState>(MR, RefState::getBeginValid(IMR)); 307 } 308 if (mName == "end") 309 return state->set<IteratorState>(MR, RefState::getEndValid(IMR)); 310 311 return state->set<IteratorState>(MR, RefState::getUnknown()); 312 } 313 } 314 // Handle straight copy from another iterator. 315 if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(rexp)) { 316 if (getTemplateKind(DRE->getType()) != VectorIteratorKind) 317 return state; 318 // Now get the MemRegion associated with the instance. 319 const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl()); 320 if (!VD) 321 return state; 322 const MemRegion *IMR = state->getRegion(VD, LC); 323 if (!IMR) 324 return state; 325 // Get the RefState of the iterator being copied. 326 const RefState *RS = state->get<IteratorState>(IMR); 327 if (!RS) 328 return state; 329 // Use it to set the state of the LValue. 330 return state->set<IteratorState>(MR, *RS); 331 } 332 // If we have operator+ or operator- ... 333 if (const CXXOperatorCallExpr *OCE = dyn_cast<CXXOperatorCallExpr>(rexp)) { 334 OverloadedOperatorKind Kind = OCE->getOperator(); 335 if (Kind == OO_Plus || Kind == OO_Minus) { 336 // Check left side of tree for a valid value. 337 state = handleAssign( state, MR, OCE->getArg(0), LC); 338 const RefState *RS = state->get<IteratorState>(MR); 339 // If found, return it. 340 if (!RS->isUnknown()) 341 return state; 342 // Otherwise return what we find in the right side. 343 return handleAssign(state, MR, OCE->getArg(1), LC); 344 } 345 } 346 // Fall through if nothing matched. 347 return state; 348 } 349 350 // Iterate through the arguments looking for an Invalid or Undefined iterator. 351 void IteratorsChecker::checkArgs(CheckerContext &C, const CallExpr *CE) const { 352 for (CallExpr::const_arg_iterator I = CE->arg_begin(), E = CE->arg_end(); 353 I != E; ++I) { 354 checkExpr(C, *I); 355 } 356 } 357 358 // Get the DeclRefExpr associated with the expression. 359 const DeclRefExpr *IteratorsChecker::getDeclRefExpr(const Expr *E) const { 360 // If it is a CXXConstructExpr, need to get the subexpression. 361 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(E)) { 362 if (CE->getNumArgs()== 1) { 363 CXXConstructorDecl *CD = CE->getConstructor(); 364 if (CD->isTrivial()) 365 E = CE->getArg(0); 366 } 367 } 368 if (const MaterializeTemporaryExpr *M = dyn_cast<MaterializeTemporaryExpr>(E)) 369 E = M->GetTemporaryExpr(); 370 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(E)) 371 E = ICE->getSubExpr(); 372 // If it isn't one of our types, don't do anything. 373 if (getTemplateKind(E->getType()) != VectorIteratorKind) 374 return NULL; 375 return dyn_cast<DeclRefExpr>(E); 376 } 377 378 // Get the MemRegion associated with the expresssion. 379 const MemRegion *IteratorsChecker::getRegion(const ProgramState *state, 380 const Expr *E, const LocationContext *LC) const { 381 const DeclRefExpr *DRE = getDeclRefExpr(E); 382 if (!DRE) 383 return NULL; 384 const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl()); 385 if (!VD) 386 return NULL; 387 // return the MemRegion associated with the iterator 388 return state->getRegion(VD, LC); 389 } 390 391 // Check the expression and if it is an iterator, generate a diagnostic 392 // if the iterator is not valid. 393 // FIXME: this method can generate new nodes, and subsequent logic should 394 // use those nodes. We also cannot create multiple nodes at one ProgramPoint 395 // with the same tag. 396 void IteratorsChecker::checkExpr(CheckerContext &C, const Expr *E) const { 397 const ProgramState *state = C.getState(); 398 const MemRegion *MR = getRegion(state, E, 399 C.getPredecessor()->getLocationContext()); 400 if (!MR) 401 return; 402 403 // Get the state associated with the iterator. 404 const RefState *RS = state->get<IteratorState>(MR); 405 if (!RS) 406 return; 407 if (RS->isInvalid()) { 408 if (ExplodedNode *N = C.generateNode()) { 409 if (!BT_Invalid) 410 // FIXME: We are eluding constness here. 411 const_cast<IteratorsChecker*>(this)->BT_Invalid = new BuiltinBug(""); 412 413 std::string msg; 414 const MemberExpr *ME = RS->getMemberExpr(); 415 if (ME) { 416 std::string name = ME->getMemberNameInfo().getAsString(); 417 msg = "Attempt to use an iterator made invalid by call to '" + 418 name + "'"; 419 } 420 else { 421 msg = "Attempt to use an iterator made invalid by copying another " 422 "container to its container"; 423 } 424 425 BugReport *R = new BugReport(*BT_Invalid, msg, N); 426 R->addRange(getDeclRefExpr(E)->getSourceRange()); 427 C.EmitReport(R); 428 } 429 } 430 else if (RS->isUndefined()) { 431 if (ExplodedNode *N = C.generateNode()) { 432 if (!BT_Undefined) 433 // FIXME: We are eluding constness here. 434 const_cast<IteratorsChecker*>(this)->BT_Undefined = 435 new BuiltinBug("Use of iterator that is not defined"); 436 437 BugReport *R = new BugReport(*BT_Undefined, 438 BT_Undefined->getDescription(), N); 439 R->addRange(getDeclRefExpr(E)->getSourceRange()); 440 C.EmitReport(R); 441 } 442 } 443 } 444 445 // =============================================== 446 // Path analysis visitor functions 447 // =============================================== 448 449 // For a generic Call, just check the args for bad iterators. 450 void IteratorsChecker::checkPreStmt(const CallExpr *CE, 451 CheckerContext &C) const{ 452 453 // FIXME: These checks are to currently work around a bug 454 // in CheckerManager. 455 if (isa<CXXOperatorCallExpr>(CE)) 456 return; 457 if (isa<CXXMemberCallExpr>(CE)) 458 return; 459 460 checkArgs(C, CE); 461 } 462 463 // Handle operator calls. First, if it is operator=, check the argument, 464 // and handle assigning and set target state appropriately. Otherwise, for 465 // other operators, check the args for bad iterators and handle comparisons. 466 void IteratorsChecker::checkPreStmt(const CXXOperatorCallExpr *OCE, 467 CheckerContext &C) const 468 { 469 const LocationContext *LC = C.getPredecessor()->getLocationContext(); 470 const ProgramState *state = C.getState(); 471 OverloadedOperatorKind Kind = OCE->getOperator(); 472 if (Kind == OO_Equal) { 473 checkExpr(C, OCE->getArg(1)); 474 state = handleAssign(state, OCE->getArg(0), OCE->getArg(1), LC); 475 C.addTransition(state); 476 return; 477 } 478 else { 479 checkArgs(C, OCE); 480 // If it is a compare and both are iterators, ensure that they are for 481 // the same container. 482 if (Kind == OO_EqualEqual || Kind == OO_ExclaimEqual || 483 Kind == OO_Less || Kind == OO_LessEqual || 484 Kind == OO_Greater || Kind == OO_GreaterEqual) { 485 const MemRegion *MR0, *MR1; 486 MR0 = getRegion(state, OCE->getArg(0), LC); 487 if (!MR0) 488 return; 489 MR1 = getRegion(state, OCE->getArg(1), LC); 490 if (!MR1) 491 return; 492 const RefState *RS0, *RS1; 493 RS0 = state->get<IteratorState>(MR0); 494 if (!RS0) 495 return; 496 RS1 = state->get<IteratorState>(MR1); 497 if (!RS1) 498 return; 499 if (RS0->getMemRegion() != RS1->getMemRegion()) { 500 if (ExplodedNode *N = C.generateNode()) { 501 if (!BT_Incompatible) 502 const_cast<IteratorsChecker*>(this)->BT_Incompatible = 503 new BuiltinBug( 504 "Cannot compare iterators from different containers"); 505 506 BugReport *R = new BugReport(*BT_Incompatible, 507 BT_Incompatible->getDescription(), N); 508 R->addRange(OCE->getSourceRange()); 509 C.EmitReport(R); 510 } 511 } 512 } 513 } 514 } 515 516 // Need to handle DeclStmts to pick up initializing of iterators and to mark 517 // uninitialized ones as Undefined. 518 void IteratorsChecker::checkPreStmt(const DeclStmt *DS, 519 CheckerContext &C) const { 520 const Decl *D = *DS->decl_begin(); 521 const VarDecl *VD = dyn_cast<VarDecl>(D); 522 // Only care about iterators. 523 if (getTemplateKind(VD->getType()) != VectorIteratorKind) 524 return; 525 526 // Get the MemRegion associated with the iterator and mark it as Undefined. 527 const ProgramState *state = C.getState(); 528 Loc VarLoc = state->getLValue(VD, C.getPredecessor()->getLocationContext()); 529 const MemRegion *MR = VarLoc.getAsRegion(); 530 if (!MR) 531 return; 532 state = state->set<IteratorState>(MR, RefState::getUndefined()); 533 534 // if there is an initializer, handle marking Valid if a proper initializer 535 const Expr *InitEx = VD->getInit(); 536 if (InitEx) { 537 // FIXME: This is too syntactic. Since 'InitEx' will be analyzed first 538 // it should resolve to an SVal that we can check for validity 539 // *semantically* instead of walking through the AST. 540 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(InitEx)) { 541 if (CE->getNumArgs() == 1) { 542 const Expr *E = CE->getArg(0); 543 if (const MaterializeTemporaryExpr *M 544 = dyn_cast<MaterializeTemporaryExpr>(E)) 545 E = M->GetTemporaryExpr(); 546 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(E)) 547 InitEx = ICE->getSubExpr(); 548 state = handleAssign(state, MR, InitEx, 549 C.getPredecessor()->getLocationContext()); 550 } 551 } 552 } 553 C.addTransition(state); 554 } 555 556 557 namespace { struct CalledReserved {}; } 558 namespace clang { namespace ento { 559 template<> struct ProgramStateTrait<CalledReserved> 560 : public ProgramStatePartialTrait<llvm::ImmutableSet<const MemRegion*> > { 561 static void *GDMIndex() { static int index = 0; return &index; } 562 }; 563 }} 564 565 // on a member call, first check the args for any bad iterators 566 // then, check to see if it is a call to a function that will invalidate 567 // the iterators 568 void IteratorsChecker::checkPreStmt(const CXXMemberCallExpr *MCE, 569 CheckerContext &C) const { 570 // Check the arguments. 571 checkArgs(C, MCE); 572 const MemberExpr *ME = dyn_cast<MemberExpr>(MCE->getCallee()); 573 if (!ME) 574 return; 575 // Make sure we have the right kind of container. 576 const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(ME->getBase()); 577 if (!DRE || getTemplateKind(DRE->getType()) != VectorKind) 578 return; 579 SVal tsv = C.getState()->getSVal(DRE); 580 // Get the MemRegion associated with the container instance. 581 const MemRegion *MR = tsv.getAsRegion(); 582 if (!MR) 583 return; 584 // If we are calling a function that invalidates iterators, mark them 585 // appropriately by finding matching instances. 586 const ProgramState *state = C.getState(); 587 StringRef mName = ME->getMemberDecl()->getName(); 588 if (llvm::StringSwitch<bool>(mName) 589 .Cases("insert", "reserve", "push_back", true) 590 .Cases("erase", "pop_back", "clear", "resize", true) 591 .Default(false)) { 592 // If there was a 'reserve' call, assume iterators are good. 593 if (!state->contains<CalledReserved>(MR)) 594 state = invalidateIterators(state, MR, ME); 595 } 596 // Keep track of instances that have called 'reserve' 597 // note: do this after we invalidate any iterators by calling 598 // 'reserve' itself. 599 if (mName == "reserve") 600 state = state->add<CalledReserved>(MR); 601 602 if (state != C.getState()) 603 C.addTransition(state); 604 } 605 606