1 //== RangeConstraintManager.cpp - Manage range constraints.------*- C++ -*--==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines RangeConstraintManager, a class that tracks simple 11 // equality and inequality constraints on symbolic values of ProgramState. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "SimpleConstraintManager.h" 16 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" 17 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/ADT/FoldingSet.h" 20 #include "llvm/ADT/ImmutableSet.h" 21 #include "llvm/Support/raw_ostream.h" 22 23 using namespace clang; 24 using namespace ento; 25 26 namespace { class ConstraintRange {}; } 27 static int ConstraintRangeIndex = 0; 28 29 /// A Range represents the closed range [from, to]. The caller must 30 /// guarantee that from <= to. Note that Range is immutable, so as not 31 /// to subvert RangeSet's immutability. 32 namespace { 33 class Range : public std::pair<const llvm::APSInt*, 34 const llvm::APSInt*> { 35 public: 36 Range(const llvm::APSInt &from, const llvm::APSInt &to) 37 : std::pair<const llvm::APSInt*, const llvm::APSInt*>(&from, &to) { 38 assert(from <= to); 39 } 40 bool Includes(const llvm::APSInt &v) const { 41 return *first <= v && v <= *second; 42 } 43 const llvm::APSInt &From() const { 44 return *first; 45 } 46 const llvm::APSInt &To() const { 47 return *second; 48 } 49 const llvm::APSInt *getConcreteValue() const { 50 return &From() == &To() ? &From() : NULL; 51 } 52 53 void Profile(llvm::FoldingSetNodeID &ID) const { 54 ID.AddPointer(&From()); 55 ID.AddPointer(&To()); 56 } 57 }; 58 59 60 class RangeTrait : public llvm::ImutContainerInfo<Range> { 61 public: 62 // When comparing if one Range is less than another, we should compare 63 // the actual APSInt values instead of their pointers. This keeps the order 64 // consistent (instead of comparing by pointer values) and can potentially 65 // be used to speed up some of the operations in RangeSet. 66 static inline bool isLess(key_type_ref lhs, key_type_ref rhs) { 67 return *lhs.first < *rhs.first || (!(*rhs.first < *lhs.first) && 68 *lhs.second < *rhs.second); 69 } 70 }; 71 72 /// RangeSet contains a set of ranges. If the set is empty, then 73 /// there the value of a symbol is overly constrained and there are no 74 /// possible values for that symbol. 75 class RangeSet { 76 typedef llvm::ImmutableSet<Range, RangeTrait> PrimRangeSet; 77 PrimRangeSet ranges; // no need to make const, since it is an 78 // ImmutableSet - this allows default operator= 79 // to work. 80 public: 81 typedef PrimRangeSet::Factory Factory; 82 typedef PrimRangeSet::iterator iterator; 83 84 RangeSet(PrimRangeSet RS) : ranges(RS) {} 85 86 iterator begin() const { return ranges.begin(); } 87 iterator end() const { return ranges.end(); } 88 89 bool isEmpty() const { return ranges.isEmpty(); } 90 91 /// Construct a new RangeSet representing '{ [from, to] }'. 92 RangeSet(Factory &F, const llvm::APSInt &from, const llvm::APSInt &to) 93 : ranges(F.add(F.getEmptySet(), Range(from, to))) {} 94 95 /// Profile - Generates a hash profile of this RangeSet for use 96 /// by FoldingSet. 97 void Profile(llvm::FoldingSetNodeID &ID) const { ranges.Profile(ID); } 98 99 /// getConcreteValue - If a symbol is contrained to equal a specific integer 100 /// constant then this method returns that value. Otherwise, it returns 101 /// NULL. 102 const llvm::APSInt* getConcreteValue() const { 103 return ranges.isSingleton() ? ranges.begin()->getConcreteValue() : 0; 104 } 105 106 private: 107 void IntersectInRange(BasicValueFactory &BV, Factory &F, 108 const llvm::APSInt &Lower, 109 const llvm::APSInt &Upper, 110 PrimRangeSet &newRanges, 111 PrimRangeSet::iterator &i, 112 PrimRangeSet::iterator &e) const { 113 // There are six cases for each range R in the set: 114 // 1. R is entirely before the intersection range. 115 // 2. R is entirely after the intersection range. 116 // 3. R contains the entire intersection range. 117 // 4. R starts before the intersection range and ends in the middle. 118 // 5. R starts in the middle of the intersection range and ends after it. 119 // 6. R is entirely contained in the intersection range. 120 // These correspond to each of the conditions below. 121 for (/* i = begin(), e = end() */; i != e; ++i) { 122 if (i->To() < Lower) { 123 continue; 124 } 125 if (i->From() > Upper) { 126 break; 127 } 128 129 if (i->Includes(Lower)) { 130 if (i->Includes(Upper)) { 131 newRanges = F.add(newRanges, Range(BV.getValue(Lower), 132 BV.getValue(Upper))); 133 break; 134 } else 135 newRanges = F.add(newRanges, Range(BV.getValue(Lower), i->To())); 136 } else { 137 if (i->Includes(Upper)) { 138 newRanges = F.add(newRanges, Range(i->From(), BV.getValue(Upper))); 139 break; 140 } else 141 newRanges = F.add(newRanges, *i); 142 } 143 } 144 } 145 146 public: 147 // Returns a set containing the values in the receiving set, intersected with 148 // the closed range [Lower, Upper]. Unlike the Range type, this range uses 149 // modular arithmetic, corresponding to the common treatment of C integer 150 // overflow. Thus, if the Lower bound is greater than the Upper bound, the 151 // range is taken to wrap around. This is equivalent to taking the 152 // intersection with the two ranges [Min, Upper] and [Lower, Max], 153 // or, alternatively, /removing/ all integers between Upper and Lower. 154 RangeSet Intersect(BasicValueFactory &BV, Factory &F, 155 const llvm::APSInt &Lower, 156 const llvm::APSInt &Upper) const { 157 PrimRangeSet newRanges = F.getEmptySet(); 158 159 PrimRangeSet::iterator i = begin(), e = end(); 160 if (Lower <= Upper) 161 IntersectInRange(BV, F, Lower, Upper, newRanges, i, e); 162 else { 163 // The order of the next two statements is important! 164 // IntersectInRange() does not reset the iteration state for i and e. 165 // Therefore, the lower range most be handled first. 166 IntersectInRange(BV, F, BV.getMinValue(Upper), Upper, newRanges, i, e); 167 IntersectInRange(BV, F, Lower, BV.getMaxValue(Lower), newRanges, i, e); 168 } 169 return newRanges; 170 } 171 172 void print(raw_ostream &os) const { 173 bool isFirst = true; 174 os << "{ "; 175 for (iterator i = begin(), e = end(); i != e; ++i) { 176 if (isFirst) 177 isFirst = false; 178 else 179 os << ", "; 180 181 os << '[' << i->From().toString(10) << ", " << i->To().toString(10) 182 << ']'; 183 } 184 os << " }"; 185 } 186 187 bool operator==(const RangeSet &other) const { 188 return ranges == other.ranges; 189 } 190 }; 191 } // end anonymous namespace 192 193 typedef llvm::ImmutableMap<SymbolRef,RangeSet> ConstraintRangeTy; 194 195 namespace clang { 196 namespace ento { 197 template<> 198 struct ProgramStateTrait<ConstraintRange> 199 : public ProgramStatePartialTrait<ConstraintRangeTy> { 200 static inline void *GDMIndex() { return &ConstraintRangeIndex; } 201 }; 202 } 203 } 204 205 namespace { 206 class RangeConstraintManager : public SimpleConstraintManager{ 207 RangeSet GetRange(ProgramStateRef state, SymbolRef sym); 208 public: 209 RangeConstraintManager(SubEngine &subengine) 210 : SimpleConstraintManager(subengine) {} 211 212 ProgramStateRef assumeSymNE(ProgramStateRef state, SymbolRef sym, 213 const llvm::APSInt& Int, 214 const llvm::APSInt& Adjustment); 215 216 ProgramStateRef assumeSymEQ(ProgramStateRef state, SymbolRef sym, 217 const llvm::APSInt& Int, 218 const llvm::APSInt& Adjustment); 219 220 ProgramStateRef assumeSymLT(ProgramStateRef state, SymbolRef sym, 221 const llvm::APSInt& Int, 222 const llvm::APSInt& Adjustment); 223 224 ProgramStateRef assumeSymGT(ProgramStateRef state, SymbolRef sym, 225 const llvm::APSInt& Int, 226 const llvm::APSInt& Adjustment); 227 228 ProgramStateRef assumeSymGE(ProgramStateRef state, SymbolRef sym, 229 const llvm::APSInt& Int, 230 const llvm::APSInt& Adjustment); 231 232 ProgramStateRef assumeSymLE(ProgramStateRef state, SymbolRef sym, 233 const llvm::APSInt& Int, 234 const llvm::APSInt& Adjustment); 235 236 const llvm::APSInt* getSymVal(ProgramStateRef St, SymbolRef sym) const; 237 238 // FIXME: Refactor into SimpleConstraintManager? 239 bool isEqual(ProgramStateRef St, SymbolRef sym, const llvm::APSInt& V) const { 240 const llvm::APSInt *i = getSymVal(St, sym); 241 return i ? *i == V : false; 242 } 243 244 ProgramStateRef removeDeadBindings(ProgramStateRef St, SymbolReaper& SymReaper); 245 246 void print(ProgramStateRef St, raw_ostream &Out, 247 const char* nl, const char *sep); 248 249 private: 250 RangeSet::Factory F; 251 }; 252 253 } // end anonymous namespace 254 255 ConstraintManager* ento::CreateRangeConstraintManager(ProgramStateManager&, 256 SubEngine &subeng) { 257 return new RangeConstraintManager(subeng); 258 } 259 260 const llvm::APSInt* RangeConstraintManager::getSymVal(ProgramStateRef St, 261 SymbolRef sym) const { 262 const ConstraintRangeTy::data_type *T = St->get<ConstraintRange>(sym); 263 return T ? T->getConcreteValue() : NULL; 264 } 265 266 /// Scan all symbols referenced by the constraints. If the symbol is not alive 267 /// as marked in LSymbols, mark it as dead in DSymbols. 268 ProgramStateRef 269 RangeConstraintManager::removeDeadBindings(ProgramStateRef state, 270 SymbolReaper& SymReaper) { 271 272 ConstraintRangeTy CR = state->get<ConstraintRange>(); 273 ConstraintRangeTy::Factory& CRFactory = state->get_context<ConstraintRange>(); 274 275 for (ConstraintRangeTy::iterator I = CR.begin(), E = CR.end(); I != E; ++I) { 276 SymbolRef sym = I.getKey(); 277 if (SymReaper.maybeDead(sym)) 278 CR = CRFactory.remove(CR, sym); 279 } 280 281 return state->set<ConstraintRange>(CR); 282 } 283 284 RangeSet 285 RangeConstraintManager::GetRange(ProgramStateRef state, SymbolRef sym) { 286 if (ConstraintRangeTy::data_type* V = state->get<ConstraintRange>(sym)) 287 return *V; 288 289 // Lazily generate a new RangeSet representing all possible values for the 290 // given symbol type. 291 QualType T = state->getSymbolManager().getType(sym); 292 BasicValueFactory& BV = state->getBasicVals(); 293 return RangeSet(F, BV.getMinValue(T), BV.getMaxValue(T)); 294 } 295 296 //===------------------------------------------------------------------------=== 297 // assumeSymX methods: public interface for RangeConstraintManager. 298 //===------------------------------------------------------------------------===/ 299 300 // The syntax for ranges below is mathematical, using [x, y] for closed ranges 301 // and (x, y) for open ranges. These ranges are modular, corresponding with 302 // a common treatment of C integer overflow. This means that these methods 303 // do not have to worry about overflow; RangeSet::Intersect can handle such a 304 // "wraparound" range. 305 // As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1, 306 // UINT_MAX, 0, 1, and 2. 307 308 ProgramStateRef 309 RangeConstraintManager::assumeSymNE(ProgramStateRef state, SymbolRef sym, 310 const llvm::APSInt& Int, 311 const llvm::APSInt& Adjustment) { 312 BasicValueFactory &BV = state->getBasicVals(); 313 314 llvm::APSInt Lower = Int-Adjustment; 315 llvm::APSInt Upper = Lower; 316 --Lower; 317 ++Upper; 318 319 // [Int-Adjustment+1, Int-Adjustment-1] 320 // Notice that the lower bound is greater than the upper bound. 321 RangeSet New = GetRange(state, sym).Intersect(BV, F, Upper, Lower); 322 return New.isEmpty() ? NULL : state->set<ConstraintRange>(sym, New); 323 } 324 325 ProgramStateRef 326 RangeConstraintManager::assumeSymEQ(ProgramStateRef state, SymbolRef sym, 327 const llvm::APSInt& Int, 328 const llvm::APSInt& Adjustment) { 329 // [Int-Adjustment, Int-Adjustment] 330 BasicValueFactory &BV = state->getBasicVals(); 331 llvm::APSInt AdjInt = Int-Adjustment; 332 RangeSet New = GetRange(state, sym).Intersect(BV, F, AdjInt, AdjInt); 333 return New.isEmpty() ? NULL : state->set<ConstraintRange>(sym, New); 334 } 335 336 ProgramStateRef 337 RangeConstraintManager::assumeSymLT(ProgramStateRef state, SymbolRef sym, 338 const llvm::APSInt& Int, 339 const llvm::APSInt& Adjustment) { 340 BasicValueFactory &BV = state->getBasicVals(); 341 342 QualType T = state->getSymbolManager().getType(sym); 343 const llvm::APSInt &Min = BV.getMinValue(T); 344 345 // Special case for Int == Min. This is always false. 346 if (Int == Min) 347 return NULL; 348 349 llvm::APSInt Lower = Min-Adjustment; 350 llvm::APSInt Upper = Int-Adjustment; 351 --Upper; 352 353 RangeSet New = GetRange(state, sym).Intersect(BV, F, Lower, Upper); 354 return New.isEmpty() ? NULL : state->set<ConstraintRange>(sym, New); 355 } 356 357 ProgramStateRef 358 RangeConstraintManager::assumeSymGT(ProgramStateRef state, SymbolRef sym, 359 const llvm::APSInt& Int, 360 const llvm::APSInt& Adjustment) { 361 BasicValueFactory &BV = state->getBasicVals(); 362 363 QualType T = state->getSymbolManager().getType(sym); 364 const llvm::APSInt &Max = BV.getMaxValue(T); 365 366 // Special case for Int == Max. This is always false. 367 if (Int == Max) 368 return NULL; 369 370 llvm::APSInt Lower = Int-Adjustment; 371 llvm::APSInt Upper = Max-Adjustment; 372 ++Lower; 373 374 RangeSet New = GetRange(state, sym).Intersect(BV, F, Lower, Upper); 375 return New.isEmpty() ? NULL : state->set<ConstraintRange>(sym, New); 376 } 377 378 ProgramStateRef 379 RangeConstraintManager::assumeSymGE(ProgramStateRef state, SymbolRef sym, 380 const llvm::APSInt& Int, 381 const llvm::APSInt& Adjustment) { 382 BasicValueFactory &BV = state->getBasicVals(); 383 384 QualType T = state->getSymbolManager().getType(sym); 385 const llvm::APSInt &Min = BV.getMinValue(T); 386 387 // Special case for Int == Min. This is always feasible. 388 if (Int == Min) 389 return state; 390 391 const llvm::APSInt &Max = BV.getMaxValue(T); 392 393 llvm::APSInt Lower = Int-Adjustment; 394 llvm::APSInt Upper = Max-Adjustment; 395 396 RangeSet New = GetRange(state, sym).Intersect(BV, F, Lower, Upper); 397 return New.isEmpty() ? NULL : state->set<ConstraintRange>(sym, New); 398 } 399 400 ProgramStateRef 401 RangeConstraintManager::assumeSymLE(ProgramStateRef state, SymbolRef sym, 402 const llvm::APSInt& Int, 403 const llvm::APSInt& Adjustment) { 404 BasicValueFactory &BV = state->getBasicVals(); 405 406 QualType T = state->getSymbolManager().getType(sym); 407 const llvm::APSInt &Max = BV.getMaxValue(T); 408 409 // Special case for Int == Max. This is always feasible. 410 if (Int == Max) 411 return state; 412 413 const llvm::APSInt &Min = BV.getMinValue(T); 414 415 llvm::APSInt Lower = Min-Adjustment; 416 llvm::APSInt Upper = Int-Adjustment; 417 418 RangeSet New = GetRange(state, sym).Intersect(BV, F, Lower, Upper); 419 return New.isEmpty() ? NULL : state->set<ConstraintRange>(sym, New); 420 } 421 422 //===------------------------------------------------------------------------=== 423 // Pretty-printing. 424 //===------------------------------------------------------------------------===/ 425 426 void RangeConstraintManager::print(ProgramStateRef St, raw_ostream &Out, 427 const char* nl, const char *sep) { 428 429 ConstraintRangeTy Ranges = St->get<ConstraintRange>(); 430 431 if (Ranges.isEmpty()) { 432 Out << nl << sep << "Ranges are empty." << nl; 433 return; 434 } 435 436 Out << nl << sep << "Ranges of symbol values:"; 437 for (ConstraintRangeTy::iterator I=Ranges.begin(), E=Ranges.end(); I!=E; ++I){ 438 Out << nl << ' ' << I.getKey() << " : "; 439 I.getData().print(Out); 440 } 441 Out << nl; 442 } 443