1 //== RangeConstraintManager.cpp - Manage range constraints.------*- C++ -*--==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines RangeConstraintManager, a class that tracks simple 11 // equality and inequality constraints on symbolic values of ProgramState. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "SimpleConstraintManager.h" 16 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" 17 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" 18 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 19 #include "llvm/ADT/FoldingSet.h" 20 #include "llvm/ADT/ImmutableSet.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/raw_ostream.h" 23 24 using namespace clang; 25 using namespace ento; 26 27 /// A Range represents the closed range [from, to]. The caller must 28 /// guarantee that from <= to. Note that Range is immutable, so as not 29 /// to subvert RangeSet's immutability. 30 namespace { 31 class Range : public std::pair<const llvm::APSInt*, 32 const llvm::APSInt*> { 33 public: 34 Range(const llvm::APSInt &from, const llvm::APSInt &to) 35 : std::pair<const llvm::APSInt*, const llvm::APSInt*>(&from, &to) { 36 assert(from <= to); 37 } 38 bool Includes(const llvm::APSInt &v) const { 39 return *first <= v && v <= *second; 40 } 41 const llvm::APSInt &From() const { 42 return *first; 43 } 44 const llvm::APSInt &To() const { 45 return *second; 46 } 47 const llvm::APSInt *getConcreteValue() const { 48 return &From() == &To() ? &From() : NULL; 49 } 50 51 void Profile(llvm::FoldingSetNodeID &ID) const { 52 ID.AddPointer(&From()); 53 ID.AddPointer(&To()); 54 } 55 }; 56 57 58 class RangeTrait : public llvm::ImutContainerInfo<Range> { 59 public: 60 // When comparing if one Range is less than another, we should compare 61 // the actual APSInt values instead of their pointers. This keeps the order 62 // consistent (instead of comparing by pointer values) and can potentially 63 // be used to speed up some of the operations in RangeSet. 64 static inline bool isLess(key_type_ref lhs, key_type_ref rhs) { 65 return *lhs.first < *rhs.first || (!(*rhs.first < *lhs.first) && 66 *lhs.second < *rhs.second); 67 } 68 }; 69 70 /// RangeSet contains a set of ranges. If the set is empty, then 71 /// there the value of a symbol is overly constrained and there are no 72 /// possible values for that symbol. 73 class RangeSet { 74 typedef llvm::ImmutableSet<Range, RangeTrait> PrimRangeSet; 75 PrimRangeSet ranges; // no need to make const, since it is an 76 // ImmutableSet - this allows default operator= 77 // to work. 78 public: 79 typedef PrimRangeSet::Factory Factory; 80 typedef PrimRangeSet::iterator iterator; 81 82 RangeSet(PrimRangeSet RS) : ranges(RS) {} 83 84 iterator begin() const { return ranges.begin(); } 85 iterator end() const { return ranges.end(); } 86 87 bool isEmpty() const { return ranges.isEmpty(); } 88 89 /// Construct a new RangeSet representing '{ [from, to] }'. 90 RangeSet(Factory &F, const llvm::APSInt &from, const llvm::APSInt &to) 91 : ranges(F.add(F.getEmptySet(), Range(from, to))) {} 92 93 /// Profile - Generates a hash profile of this RangeSet for use 94 /// by FoldingSet. 95 void Profile(llvm::FoldingSetNodeID &ID) const { ranges.Profile(ID); } 96 97 /// getConcreteValue - If a symbol is contrained to equal a specific integer 98 /// constant then this method returns that value. Otherwise, it returns 99 /// NULL. 100 const llvm::APSInt* getConcreteValue() const { 101 return ranges.isSingleton() ? ranges.begin()->getConcreteValue() : 0; 102 } 103 104 private: 105 void IntersectInRange(BasicValueFactory &BV, Factory &F, 106 const llvm::APSInt &Lower, 107 const llvm::APSInt &Upper, 108 PrimRangeSet &newRanges, 109 PrimRangeSet::iterator &i, 110 PrimRangeSet::iterator &e) const { 111 // There are six cases for each range R in the set: 112 // 1. R is entirely before the intersection range. 113 // 2. R is entirely after the intersection range. 114 // 3. R contains the entire intersection range. 115 // 4. R starts before the intersection range and ends in the middle. 116 // 5. R starts in the middle of the intersection range and ends after it. 117 // 6. R is entirely contained in the intersection range. 118 // These correspond to each of the conditions below. 119 for (/* i = begin(), e = end() */; i != e; ++i) { 120 if (i->To() < Lower) { 121 continue; 122 } 123 if (i->From() > Upper) { 124 break; 125 } 126 127 if (i->Includes(Lower)) { 128 if (i->Includes(Upper)) { 129 newRanges = F.add(newRanges, Range(BV.getValue(Lower), 130 BV.getValue(Upper))); 131 break; 132 } else 133 newRanges = F.add(newRanges, Range(BV.getValue(Lower), i->To())); 134 } else { 135 if (i->Includes(Upper)) { 136 newRanges = F.add(newRanges, Range(i->From(), BV.getValue(Upper))); 137 break; 138 } else 139 newRanges = F.add(newRanges, *i); 140 } 141 } 142 } 143 144 const llvm::APSInt &getMinValue() const { 145 assert(!isEmpty()); 146 return ranges.begin()->From(); 147 } 148 149 bool pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const { 150 // This function has nine cases, the cartesian product of range-testing 151 // both the upper and lower bounds against the symbol's type. 152 // Each case requires a different pinning operation. 153 // The function returns false if the described range is entirely outside 154 // the range of values for the associated symbol. 155 APSIntType Type(getMinValue()); 156 APSIntType::RangeTestResultKind LowerTest = Type.testInRange(Lower, true); 157 APSIntType::RangeTestResultKind UpperTest = Type.testInRange(Upper, true); 158 159 switch (LowerTest) { 160 case APSIntType::RTR_Below: 161 switch (UpperTest) { 162 case APSIntType::RTR_Below: 163 // The entire range is outside the symbol's set of possible values. 164 // If this is a conventionally-ordered range, the state is infeasible. 165 if (Lower < Upper) 166 return false; 167 168 // However, if the range wraps around, it spans all possible values. 169 Lower = Type.getMinValue(); 170 Upper = Type.getMaxValue(); 171 break; 172 case APSIntType::RTR_Within: 173 // The range starts below what's possible but ends within it. Pin. 174 Lower = Type.getMinValue(); 175 Type.apply(Upper); 176 break; 177 case APSIntType::RTR_Above: 178 // The range spans all possible values for the symbol. Pin. 179 Lower = Type.getMinValue(); 180 Upper = Type.getMaxValue(); 181 break; 182 } 183 break; 184 case APSIntType::RTR_Within: 185 switch (UpperTest) { 186 case APSIntType::RTR_Below: 187 // The range wraps around, but all lower values are not possible. 188 Type.apply(Lower); 189 Upper = Type.getMaxValue(); 190 break; 191 case APSIntType::RTR_Within: 192 // The range may or may not wrap around, but both limits are valid. 193 Type.apply(Lower); 194 Type.apply(Upper); 195 break; 196 case APSIntType::RTR_Above: 197 // The range starts within what's possible but ends above it. Pin. 198 Type.apply(Lower); 199 Upper = Type.getMaxValue(); 200 break; 201 } 202 break; 203 case APSIntType::RTR_Above: 204 switch (UpperTest) { 205 case APSIntType::RTR_Below: 206 // The range wraps but is outside the symbol's set of possible values. 207 return false; 208 case APSIntType::RTR_Within: 209 // The range starts above what's possible but ends within it (wrap). 210 Lower = Type.getMinValue(); 211 Type.apply(Upper); 212 break; 213 case APSIntType::RTR_Above: 214 // The entire range is outside the symbol's set of possible values. 215 // If this is a conventionally-ordered range, the state is infeasible. 216 if (Lower < Upper) 217 return false; 218 219 // However, if the range wraps around, it spans all possible values. 220 Lower = Type.getMinValue(); 221 Upper = Type.getMaxValue(); 222 break; 223 } 224 break; 225 } 226 227 return true; 228 } 229 230 public: 231 // Returns a set containing the values in the receiving set, intersected with 232 // the closed range [Lower, Upper]. Unlike the Range type, this range uses 233 // modular arithmetic, corresponding to the common treatment of C integer 234 // overflow. Thus, if the Lower bound is greater than the Upper bound, the 235 // range is taken to wrap around. This is equivalent to taking the 236 // intersection with the two ranges [Min, Upper] and [Lower, Max], 237 // or, alternatively, /removing/ all integers between Upper and Lower. 238 RangeSet Intersect(BasicValueFactory &BV, Factory &F, 239 llvm::APSInt Lower, llvm::APSInt Upper) const { 240 if (!pin(Lower, Upper)) 241 return F.getEmptySet(); 242 243 PrimRangeSet newRanges = F.getEmptySet(); 244 245 PrimRangeSet::iterator i = begin(), e = end(); 246 if (Lower <= Upper) 247 IntersectInRange(BV, F, Lower, Upper, newRanges, i, e); 248 else { 249 // The order of the next two statements is important! 250 // IntersectInRange() does not reset the iteration state for i and e. 251 // Therefore, the lower range most be handled first. 252 IntersectInRange(BV, F, BV.getMinValue(Upper), Upper, newRanges, i, e); 253 IntersectInRange(BV, F, Lower, BV.getMaxValue(Lower), newRanges, i, e); 254 } 255 256 return newRanges; 257 } 258 259 void print(raw_ostream &os) const { 260 bool isFirst = true; 261 os << "{ "; 262 for (iterator i = begin(), e = end(); i != e; ++i) { 263 if (isFirst) 264 isFirst = false; 265 else 266 os << ", "; 267 268 os << '[' << i->From().toString(10) << ", " << i->To().toString(10) 269 << ']'; 270 } 271 os << " }"; 272 } 273 274 bool operator==(const RangeSet &other) const { 275 return ranges == other.ranges; 276 } 277 }; 278 } // end anonymous namespace 279 280 REGISTER_TRAIT_WITH_PROGRAMSTATE(ConstraintRange, 281 CLANG_ENTO_PROGRAMSTATE_MAP(SymbolRef, 282 RangeSet)) 283 284 namespace { 285 class RangeConstraintManager : public SimpleConstraintManager{ 286 RangeSet GetRange(ProgramStateRef state, SymbolRef sym); 287 public: 288 RangeConstraintManager(SubEngine *subengine, SValBuilder &SVB) 289 : SimpleConstraintManager(subengine, SVB) {} 290 291 ProgramStateRef assumeSymNE(ProgramStateRef state, SymbolRef sym, 292 const llvm::APSInt& Int, 293 const llvm::APSInt& Adjustment); 294 295 ProgramStateRef assumeSymEQ(ProgramStateRef state, SymbolRef sym, 296 const llvm::APSInt& Int, 297 const llvm::APSInt& Adjustment); 298 299 ProgramStateRef assumeSymLT(ProgramStateRef state, SymbolRef sym, 300 const llvm::APSInt& Int, 301 const llvm::APSInt& Adjustment); 302 303 ProgramStateRef assumeSymGT(ProgramStateRef state, SymbolRef sym, 304 const llvm::APSInt& Int, 305 const llvm::APSInt& Adjustment); 306 307 ProgramStateRef assumeSymGE(ProgramStateRef state, SymbolRef sym, 308 const llvm::APSInt& Int, 309 const llvm::APSInt& Adjustment); 310 311 ProgramStateRef assumeSymLE(ProgramStateRef state, SymbolRef sym, 312 const llvm::APSInt& Int, 313 const llvm::APSInt& Adjustment); 314 315 const llvm::APSInt* getSymVal(ProgramStateRef St, SymbolRef sym) const; 316 ConditionTruthVal checkNull(ProgramStateRef State, SymbolRef Sym); 317 318 ProgramStateRef removeDeadBindings(ProgramStateRef St, SymbolReaper& SymReaper); 319 320 void print(ProgramStateRef St, raw_ostream &Out, 321 const char* nl, const char *sep); 322 323 private: 324 RangeSet::Factory F; 325 }; 326 327 } // end anonymous namespace 328 329 ConstraintManager * 330 ento::CreateRangeConstraintManager(ProgramStateManager &StMgr, SubEngine *Eng) { 331 return new RangeConstraintManager(Eng, StMgr.getSValBuilder()); 332 } 333 334 const llvm::APSInt* RangeConstraintManager::getSymVal(ProgramStateRef St, 335 SymbolRef sym) const { 336 const ConstraintRangeTy::data_type *T = St->get<ConstraintRange>(sym); 337 return T ? T->getConcreteValue() : NULL; 338 } 339 340 ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State, 341 SymbolRef Sym) { 342 const RangeSet *Ranges = State->get<ConstraintRange>(Sym); 343 344 // If we don't have any information about this symbol, it's underconstrained. 345 if (!Ranges) 346 return ConditionTruthVal(); 347 348 // If we have a concrete value, see if it's zero. 349 if (const llvm::APSInt *Value = Ranges->getConcreteValue()) 350 return *Value == 0; 351 352 BasicValueFactory &BV = getBasicVals(); 353 APSIntType IntType = BV.getAPSIntType(Sym->getType()); 354 llvm::APSInt Zero = IntType.getZeroValue(); 355 356 // Check if zero is in the set of possible values. 357 if (Ranges->Intersect(BV, F, Zero, Zero).isEmpty()) 358 return false; 359 360 // Zero is a possible value, but it is not the /only/ possible value. 361 return ConditionTruthVal(); 362 } 363 364 /// Scan all symbols referenced by the constraints. If the symbol is not alive 365 /// as marked in LSymbols, mark it as dead in DSymbols. 366 ProgramStateRef 367 RangeConstraintManager::removeDeadBindings(ProgramStateRef state, 368 SymbolReaper& SymReaper) { 369 370 ConstraintRangeTy CR = state->get<ConstraintRange>(); 371 ConstraintRangeTy::Factory& CRFactory = state->get_context<ConstraintRange>(); 372 373 for (ConstraintRangeTy::iterator I = CR.begin(), E = CR.end(); I != E; ++I) { 374 SymbolRef sym = I.getKey(); 375 if (SymReaper.maybeDead(sym)) 376 CR = CRFactory.remove(CR, sym); 377 } 378 379 return state->set<ConstraintRange>(CR); 380 } 381 382 RangeSet 383 RangeConstraintManager::GetRange(ProgramStateRef state, SymbolRef sym) { 384 if (ConstraintRangeTy::data_type* V = state->get<ConstraintRange>(sym)) 385 return *V; 386 387 // Lazily generate a new RangeSet representing all possible values for the 388 // given symbol type. 389 BasicValueFactory &BV = getBasicVals(); 390 QualType T = sym->getType(); 391 392 RangeSet Result(F, BV.getMinValue(T), BV.getMaxValue(T)); 393 394 // Special case: references are known to be non-zero. 395 if (T->isReferenceType()) { 396 APSIntType IntType = BV.getAPSIntType(T); 397 Result = Result.Intersect(BV, F, ++IntType.getZeroValue(), 398 --IntType.getZeroValue()); 399 } 400 401 return Result; 402 } 403 404 //===------------------------------------------------------------------------=== 405 // assumeSymX methods: public interface for RangeConstraintManager. 406 //===------------------------------------------------------------------------===/ 407 408 // The syntax for ranges below is mathematical, using [x, y] for closed ranges 409 // and (x, y) for open ranges. These ranges are modular, corresponding with 410 // a common treatment of C integer overflow. This means that these methods 411 // do not have to worry about overflow; RangeSet::Intersect can handle such a 412 // "wraparound" range. 413 // As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1, 414 // UINT_MAX, 0, 1, and 2. 415 416 ProgramStateRef 417 RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym, 418 const llvm::APSInt &Int, 419 const llvm::APSInt &Adjustment) { 420 // Before we do any real work, see if the value can even show up. 421 APSIntType AdjustmentType(Adjustment); 422 if (AdjustmentType.testInRange(Int, true) != APSIntType::RTR_Within) 423 return St; 424 425 llvm::APSInt Lower = AdjustmentType.convert(Int) - Adjustment; 426 llvm::APSInt Upper = Lower; 427 --Lower; 428 ++Upper; 429 430 // [Int-Adjustment+1, Int-Adjustment-1] 431 // Notice that the lower bound is greater than the upper bound. 432 RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Upper, Lower); 433 return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New); 434 } 435 436 ProgramStateRef 437 RangeConstraintManager::assumeSymEQ(ProgramStateRef St, SymbolRef Sym, 438 const llvm::APSInt &Int, 439 const llvm::APSInt &Adjustment) { 440 // Before we do any real work, see if the value can even show up. 441 APSIntType AdjustmentType(Adjustment); 442 if (AdjustmentType.testInRange(Int, true) != APSIntType::RTR_Within) 443 return NULL; 444 445 // [Int-Adjustment, Int-Adjustment] 446 llvm::APSInt AdjInt = AdjustmentType.convert(Int) - Adjustment; 447 RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, AdjInt, AdjInt); 448 return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New); 449 } 450 451 ProgramStateRef 452 RangeConstraintManager::assumeSymLT(ProgramStateRef St, SymbolRef Sym, 453 const llvm::APSInt &Int, 454 const llvm::APSInt &Adjustment) { 455 // Before we do any real work, see if the value can even show up. 456 APSIntType AdjustmentType(Adjustment); 457 switch (AdjustmentType.testInRange(Int, true)) { 458 case APSIntType::RTR_Below: 459 return NULL; 460 case APSIntType::RTR_Within: 461 break; 462 case APSIntType::RTR_Above: 463 return St; 464 } 465 466 // Special case for Int == Min. This is always false. 467 llvm::APSInt ComparisonVal = AdjustmentType.convert(Int); 468 llvm::APSInt Min = AdjustmentType.getMinValue(); 469 if (ComparisonVal == Min) 470 return NULL; 471 472 llvm::APSInt Lower = Min-Adjustment; 473 llvm::APSInt Upper = ComparisonVal-Adjustment; 474 --Upper; 475 476 RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper); 477 return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New); 478 } 479 480 ProgramStateRef 481 RangeConstraintManager::assumeSymGT(ProgramStateRef St, SymbolRef Sym, 482 const llvm::APSInt &Int, 483 const llvm::APSInt &Adjustment) { 484 // Before we do any real work, see if the value can even show up. 485 APSIntType AdjustmentType(Adjustment); 486 switch (AdjustmentType.testInRange(Int, true)) { 487 case APSIntType::RTR_Below: 488 return St; 489 case APSIntType::RTR_Within: 490 break; 491 case APSIntType::RTR_Above: 492 return NULL; 493 } 494 495 // Special case for Int == Max. This is always false. 496 llvm::APSInt ComparisonVal = AdjustmentType.convert(Int); 497 llvm::APSInt Max = AdjustmentType.getMaxValue(); 498 if (ComparisonVal == Max) 499 return NULL; 500 501 llvm::APSInt Lower = ComparisonVal-Adjustment; 502 llvm::APSInt Upper = Max-Adjustment; 503 ++Lower; 504 505 RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper); 506 return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New); 507 } 508 509 ProgramStateRef 510 RangeConstraintManager::assumeSymGE(ProgramStateRef St, SymbolRef Sym, 511 const llvm::APSInt &Int, 512 const llvm::APSInt &Adjustment) { 513 // Before we do any real work, see if the value can even show up. 514 APSIntType AdjustmentType(Adjustment); 515 switch (AdjustmentType.testInRange(Int, true)) { 516 case APSIntType::RTR_Below: 517 return St; 518 case APSIntType::RTR_Within: 519 break; 520 case APSIntType::RTR_Above: 521 return NULL; 522 } 523 524 // Special case for Int == Min. This is always feasible. 525 llvm::APSInt ComparisonVal = AdjustmentType.convert(Int); 526 llvm::APSInt Min = AdjustmentType.getMinValue(); 527 if (ComparisonVal == Min) 528 return St; 529 530 llvm::APSInt Max = AdjustmentType.getMaxValue(); 531 llvm::APSInt Lower = ComparisonVal-Adjustment; 532 llvm::APSInt Upper = Max-Adjustment; 533 534 RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper); 535 return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New); 536 } 537 538 ProgramStateRef 539 RangeConstraintManager::assumeSymLE(ProgramStateRef St, SymbolRef Sym, 540 const llvm::APSInt &Int, 541 const llvm::APSInt &Adjustment) { 542 // Before we do any real work, see if the value can even show up. 543 APSIntType AdjustmentType(Adjustment); 544 switch (AdjustmentType.testInRange(Int, true)) { 545 case APSIntType::RTR_Below: 546 return NULL; 547 case APSIntType::RTR_Within: 548 break; 549 case APSIntType::RTR_Above: 550 return St; 551 } 552 553 // Special case for Int == Max. This is always feasible. 554 llvm::APSInt ComparisonVal = AdjustmentType.convert(Int); 555 llvm::APSInt Max = AdjustmentType.getMaxValue(); 556 if (ComparisonVal == Max) 557 return St; 558 559 llvm::APSInt Min = AdjustmentType.getMinValue(); 560 llvm::APSInt Lower = Min-Adjustment; 561 llvm::APSInt Upper = ComparisonVal-Adjustment; 562 563 RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper); 564 return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New); 565 } 566 567 //===------------------------------------------------------------------------=== 568 // Pretty-printing. 569 //===------------------------------------------------------------------------===/ 570 571 void RangeConstraintManager::print(ProgramStateRef St, raw_ostream &Out, 572 const char* nl, const char *sep) { 573 574 ConstraintRangeTy Ranges = St->get<ConstraintRange>(); 575 576 if (Ranges.isEmpty()) { 577 Out << nl << sep << "Ranges are empty." << nl; 578 return; 579 } 580 581 Out << nl << sep << "Ranges of symbol values:"; 582 for (ConstraintRangeTy::iterator I=Ranges.begin(), E=Ranges.end(); I!=E; ++I){ 583 Out << nl << ' ' << I.getKey() << " : "; 584 I.getData().print(Out); 585 } 586 Out << nl; 587 } 588