Home | History | Annotate | Download | only in Core
      1 //== RangeConstraintManager.cpp - Manage range constraints.------*- C++ -*--==//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 //  This file defines RangeConstraintManager, a class that tracks simple
     11 //  equality and inequality constraints on symbolic values of ProgramState.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "SimpleConstraintManager.h"
     16 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
     17 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
     18 #include "llvm/Support/Debug.h"
     19 #include "llvm/ADT/FoldingSet.h"
     20 #include "llvm/ADT/ImmutableSet.h"
     21 #include "llvm/Support/raw_ostream.h"
     22 
     23 using namespace clang;
     24 using namespace ento;
     25 
     26 namespace { class ConstraintRange {}; }
     27 static int ConstraintRangeIndex = 0;
     28 
     29 /// A Range represents the closed range [from, to].  The caller must
     30 /// guarantee that from <= to.  Note that Range is immutable, so as not
     31 /// to subvert RangeSet's immutability.
     32 namespace {
     33 class Range : public std::pair<const llvm::APSInt*,
     34                                                 const llvm::APSInt*> {
     35 public:
     36   Range(const llvm::APSInt &from, const llvm::APSInt &to)
     37     : std::pair<const llvm::APSInt*, const llvm::APSInt*>(&from, &to) {
     38     assert(from <= to);
     39   }
     40   bool Includes(const llvm::APSInt &v) const {
     41     return *first <= v && v <= *second;
     42   }
     43   const llvm::APSInt &From() const {
     44     return *first;
     45   }
     46   const llvm::APSInt &To() const {
     47     return *second;
     48   }
     49   const llvm::APSInt *getConcreteValue() const {
     50     return &From() == &To() ? &From() : NULL;
     51   }
     52 
     53   void Profile(llvm::FoldingSetNodeID &ID) const {
     54     ID.AddPointer(&From());
     55     ID.AddPointer(&To());
     56   }
     57 };
     58 
     59 
     60 class RangeTrait : public llvm::ImutContainerInfo<Range> {
     61 public:
     62   // When comparing if one Range is less than another, we should compare
     63   // the actual APSInt values instead of their pointers.  This keeps the order
     64   // consistent (instead of comparing by pointer values) and can potentially
     65   // be used to speed up some of the operations in RangeSet.
     66   static inline bool isLess(key_type_ref lhs, key_type_ref rhs) {
     67     return *lhs.first < *rhs.first || (!(*rhs.first < *lhs.first) &&
     68                                        *lhs.second < *rhs.second);
     69   }
     70 };
     71 
     72 /// RangeSet contains a set of ranges. If the set is empty, then
     73 ///  there the value of a symbol is overly constrained and there are no
     74 ///  possible values for that symbol.
     75 class RangeSet {
     76   typedef llvm::ImmutableSet<Range, RangeTrait> PrimRangeSet;
     77   PrimRangeSet ranges; // no need to make const, since it is an
     78                        // ImmutableSet - this allows default operator=
     79                        // to work.
     80 public:
     81   typedef PrimRangeSet::Factory Factory;
     82   typedef PrimRangeSet::iterator iterator;
     83 
     84   RangeSet(PrimRangeSet RS) : ranges(RS) {}
     85 
     86   iterator begin() const { return ranges.begin(); }
     87   iterator end() const { return ranges.end(); }
     88 
     89   bool isEmpty() const { return ranges.isEmpty(); }
     90 
     91   /// Construct a new RangeSet representing '{ [from, to] }'.
     92   RangeSet(Factory &F, const llvm::APSInt &from, const llvm::APSInt &to)
     93     : ranges(F.add(F.getEmptySet(), Range(from, to))) {}
     94 
     95   /// Profile - Generates a hash profile of this RangeSet for use
     96   ///  by FoldingSet.
     97   void Profile(llvm::FoldingSetNodeID &ID) const { ranges.Profile(ID); }
     98 
     99   /// getConcreteValue - If a symbol is contrained to equal a specific integer
    100   ///  constant then this method returns that value.  Otherwise, it returns
    101   ///  NULL.
    102   const llvm::APSInt* getConcreteValue() const {
    103     return ranges.isSingleton() ? ranges.begin()->getConcreteValue() : 0;
    104   }
    105 
    106 private:
    107   void IntersectInRange(BasicValueFactory &BV, Factory &F,
    108                         const llvm::APSInt &Lower,
    109                         const llvm::APSInt &Upper,
    110                         PrimRangeSet &newRanges,
    111                         PrimRangeSet::iterator &i,
    112                         PrimRangeSet::iterator &e) const {
    113     // There are six cases for each range R in the set:
    114     //   1. R is entirely before the intersection range.
    115     //   2. R is entirely after the intersection range.
    116     //   3. R contains the entire intersection range.
    117     //   4. R starts before the intersection range and ends in the middle.
    118     //   5. R starts in the middle of the intersection range and ends after it.
    119     //   6. R is entirely contained in the intersection range.
    120     // These correspond to each of the conditions below.
    121     for (/* i = begin(), e = end() */; i != e; ++i) {
    122       if (i->To() < Lower) {
    123         continue;
    124       }
    125       if (i->From() > Upper) {
    126         break;
    127       }
    128 
    129       if (i->Includes(Lower)) {
    130         if (i->Includes(Upper)) {
    131           newRanges = F.add(newRanges, Range(BV.getValue(Lower),
    132                                              BV.getValue(Upper)));
    133           break;
    134         } else
    135           newRanges = F.add(newRanges, Range(BV.getValue(Lower), i->To()));
    136       } else {
    137         if (i->Includes(Upper)) {
    138           newRanges = F.add(newRanges, Range(i->From(), BV.getValue(Upper)));
    139           break;
    140         } else
    141           newRanges = F.add(newRanges, *i);
    142       }
    143     }
    144   }
    145 
    146 public:
    147   // Returns a set containing the values in the receiving set, intersected with
    148   // the closed range [Lower, Upper]. Unlike the Range type, this range uses
    149   // modular arithmetic, corresponding to the common treatment of C integer
    150   // overflow. Thus, if the Lower bound is greater than the Upper bound, the
    151   // range is taken to wrap around. This is equivalent to taking the
    152   // intersection with the two ranges [Min, Upper] and [Lower, Max],
    153   // or, alternatively, /removing/ all integers between Upper and Lower.
    154   RangeSet Intersect(BasicValueFactory &BV, Factory &F,
    155                      const llvm::APSInt &Lower,
    156                      const llvm::APSInt &Upper) const {
    157     PrimRangeSet newRanges = F.getEmptySet();
    158 
    159     PrimRangeSet::iterator i = begin(), e = end();
    160     if (Lower <= Upper)
    161       IntersectInRange(BV, F, Lower, Upper, newRanges, i, e);
    162     else {
    163       // The order of the next two statements is important!
    164       // IntersectInRange() does not reset the iteration state for i and e.
    165       // Therefore, the lower range most be handled first.
    166       IntersectInRange(BV, F, BV.getMinValue(Upper), Upper, newRanges, i, e);
    167       IntersectInRange(BV, F, Lower, BV.getMaxValue(Lower), newRanges, i, e);
    168     }
    169     return newRanges;
    170   }
    171 
    172   void print(raw_ostream &os) const {
    173     bool isFirst = true;
    174     os << "{ ";
    175     for (iterator i = begin(), e = end(); i != e; ++i) {
    176       if (isFirst)
    177         isFirst = false;
    178       else
    179         os << ", ";
    180 
    181       os << '[' << i->From().toString(10) << ", " << i->To().toString(10)
    182          << ']';
    183     }
    184     os << " }";
    185   }
    186 
    187   bool operator==(const RangeSet &other) const {
    188     return ranges == other.ranges;
    189   }
    190 };
    191 } // end anonymous namespace
    192 
    193 typedef llvm::ImmutableMap<SymbolRef,RangeSet> ConstraintRangeTy;
    194 
    195 namespace clang {
    196 namespace ento {
    197 template<>
    198 struct ProgramStateTrait<ConstraintRange>
    199   : public ProgramStatePartialTrait<ConstraintRangeTy> {
    200   static inline void *GDMIndex() { return &ConstraintRangeIndex; }
    201 };
    202 }
    203 }
    204 
    205 namespace {
    206 class RangeConstraintManager : public SimpleConstraintManager{
    207   RangeSet GetRange(ProgramStateRef state, SymbolRef sym);
    208 public:
    209   RangeConstraintManager(SubEngine &subengine)
    210     : SimpleConstraintManager(subengine) {}
    211 
    212   ProgramStateRef assumeSymNE(ProgramStateRef state, SymbolRef sym,
    213                              const llvm::APSInt& Int,
    214                              const llvm::APSInt& Adjustment);
    215 
    216   ProgramStateRef assumeSymEQ(ProgramStateRef state, SymbolRef sym,
    217                              const llvm::APSInt& Int,
    218                              const llvm::APSInt& Adjustment);
    219 
    220   ProgramStateRef assumeSymLT(ProgramStateRef state, SymbolRef sym,
    221                              const llvm::APSInt& Int,
    222                              const llvm::APSInt& Adjustment);
    223 
    224   ProgramStateRef assumeSymGT(ProgramStateRef state, SymbolRef sym,
    225                              const llvm::APSInt& Int,
    226                              const llvm::APSInt& Adjustment);
    227 
    228   ProgramStateRef assumeSymGE(ProgramStateRef state, SymbolRef sym,
    229                              const llvm::APSInt& Int,
    230                              const llvm::APSInt& Adjustment);
    231 
    232   ProgramStateRef assumeSymLE(ProgramStateRef state, SymbolRef sym,
    233                              const llvm::APSInt& Int,
    234                              const llvm::APSInt& Adjustment);
    235 
    236   const llvm::APSInt* getSymVal(ProgramStateRef St, SymbolRef sym) const;
    237 
    238   // FIXME: Refactor into SimpleConstraintManager?
    239   bool isEqual(ProgramStateRef St, SymbolRef sym, const llvm::APSInt& V) const {
    240     const llvm::APSInt *i = getSymVal(St, sym);
    241     return i ? *i == V : false;
    242   }
    243 
    244   ProgramStateRef removeDeadBindings(ProgramStateRef St, SymbolReaper& SymReaper);
    245 
    246   void print(ProgramStateRef St, raw_ostream &Out,
    247              const char* nl, const char *sep);
    248 
    249 private:
    250   RangeSet::Factory F;
    251 };
    252 
    253 } // end anonymous namespace
    254 
    255 ConstraintManager* ento::CreateRangeConstraintManager(ProgramStateManager&,
    256                                                     SubEngine &subeng) {
    257   return new RangeConstraintManager(subeng);
    258 }
    259 
    260 const llvm::APSInt* RangeConstraintManager::getSymVal(ProgramStateRef St,
    261                                                       SymbolRef sym) const {
    262   const ConstraintRangeTy::data_type *T = St->get<ConstraintRange>(sym);
    263   return T ? T->getConcreteValue() : NULL;
    264 }
    265 
    266 /// Scan all symbols referenced by the constraints. If the symbol is not alive
    267 /// as marked in LSymbols, mark it as dead in DSymbols.
    268 ProgramStateRef
    269 RangeConstraintManager::removeDeadBindings(ProgramStateRef state,
    270                                            SymbolReaper& SymReaper) {
    271 
    272   ConstraintRangeTy CR = state->get<ConstraintRange>();
    273   ConstraintRangeTy::Factory& CRFactory = state->get_context<ConstraintRange>();
    274 
    275   for (ConstraintRangeTy::iterator I = CR.begin(), E = CR.end(); I != E; ++I) {
    276     SymbolRef sym = I.getKey();
    277     if (SymReaper.maybeDead(sym))
    278       CR = CRFactory.remove(CR, sym);
    279   }
    280 
    281   return state->set<ConstraintRange>(CR);
    282 }
    283 
    284 RangeSet
    285 RangeConstraintManager::GetRange(ProgramStateRef state, SymbolRef sym) {
    286   if (ConstraintRangeTy::data_type* V = state->get<ConstraintRange>(sym))
    287     return *V;
    288 
    289   // Lazily generate a new RangeSet representing all possible values for the
    290   // given symbol type.
    291   QualType T = state->getSymbolManager().getType(sym);
    292   BasicValueFactory& BV = state->getBasicVals();
    293   return RangeSet(F, BV.getMinValue(T), BV.getMaxValue(T));
    294 }
    295 
    296 //===------------------------------------------------------------------------===
    297 // assumeSymX methods: public interface for RangeConstraintManager.
    298 //===------------------------------------------------------------------------===/
    299 
    300 // The syntax for ranges below is mathematical, using [x, y] for closed ranges
    301 // and (x, y) for open ranges. These ranges are modular, corresponding with
    302 // a common treatment of C integer overflow. This means that these methods
    303 // do not have to worry about overflow; RangeSet::Intersect can handle such a
    304 // "wraparound" range.
    305 // As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1,
    306 // UINT_MAX, 0, 1, and 2.
    307 
    308 ProgramStateRef
    309 RangeConstraintManager::assumeSymNE(ProgramStateRef state, SymbolRef sym,
    310                                     const llvm::APSInt& Int,
    311                                     const llvm::APSInt& Adjustment) {
    312   BasicValueFactory &BV = state->getBasicVals();
    313 
    314   llvm::APSInt Lower = Int-Adjustment;
    315   llvm::APSInt Upper = Lower;
    316   --Lower;
    317   ++Upper;
    318 
    319   // [Int-Adjustment+1, Int-Adjustment-1]
    320   // Notice that the lower bound is greater than the upper bound.
    321   RangeSet New = GetRange(state, sym).Intersect(BV, F, Upper, Lower);
    322   return New.isEmpty() ? NULL : state->set<ConstraintRange>(sym, New);
    323 }
    324 
    325 ProgramStateRef
    326 RangeConstraintManager::assumeSymEQ(ProgramStateRef state, SymbolRef sym,
    327                                     const llvm::APSInt& Int,
    328                                     const llvm::APSInt& Adjustment) {
    329   // [Int-Adjustment, Int-Adjustment]
    330   BasicValueFactory &BV = state->getBasicVals();
    331   llvm::APSInt AdjInt = Int-Adjustment;
    332   RangeSet New = GetRange(state, sym).Intersect(BV, F, AdjInt, AdjInt);
    333   return New.isEmpty() ? NULL : state->set<ConstraintRange>(sym, New);
    334 }
    335 
    336 ProgramStateRef
    337 RangeConstraintManager::assumeSymLT(ProgramStateRef state, SymbolRef sym,
    338                                     const llvm::APSInt& Int,
    339                                     const llvm::APSInt& Adjustment) {
    340   BasicValueFactory &BV = state->getBasicVals();
    341 
    342   QualType T = state->getSymbolManager().getType(sym);
    343   const llvm::APSInt &Min = BV.getMinValue(T);
    344 
    345   // Special case for Int == Min. This is always false.
    346   if (Int == Min)
    347     return NULL;
    348 
    349   llvm::APSInt Lower = Min-Adjustment;
    350   llvm::APSInt Upper = Int-Adjustment;
    351   --Upper;
    352 
    353   RangeSet New = GetRange(state, sym).Intersect(BV, F, Lower, Upper);
    354   return New.isEmpty() ? NULL : state->set<ConstraintRange>(sym, New);
    355 }
    356 
    357 ProgramStateRef
    358 RangeConstraintManager::assumeSymGT(ProgramStateRef state, SymbolRef sym,
    359                                     const llvm::APSInt& Int,
    360                                     const llvm::APSInt& Adjustment) {
    361   BasicValueFactory &BV = state->getBasicVals();
    362 
    363   QualType T = state->getSymbolManager().getType(sym);
    364   const llvm::APSInt &Max = BV.getMaxValue(T);
    365 
    366   // Special case for Int == Max. This is always false.
    367   if (Int == Max)
    368     return NULL;
    369 
    370   llvm::APSInt Lower = Int-Adjustment;
    371   llvm::APSInt Upper = Max-Adjustment;
    372   ++Lower;
    373 
    374   RangeSet New = GetRange(state, sym).Intersect(BV, F, Lower, Upper);
    375   return New.isEmpty() ? NULL : state->set<ConstraintRange>(sym, New);
    376 }
    377 
    378 ProgramStateRef
    379 RangeConstraintManager::assumeSymGE(ProgramStateRef state, SymbolRef sym,
    380                                     const llvm::APSInt& Int,
    381                                     const llvm::APSInt& Adjustment) {
    382   BasicValueFactory &BV = state->getBasicVals();
    383 
    384   QualType T = state->getSymbolManager().getType(sym);
    385   const llvm::APSInt &Min = BV.getMinValue(T);
    386 
    387   // Special case for Int == Min. This is always feasible.
    388   if (Int == Min)
    389     return state;
    390 
    391   const llvm::APSInt &Max = BV.getMaxValue(T);
    392 
    393   llvm::APSInt Lower = Int-Adjustment;
    394   llvm::APSInt Upper = Max-Adjustment;
    395 
    396   RangeSet New = GetRange(state, sym).Intersect(BV, F, Lower, Upper);
    397   return New.isEmpty() ? NULL : state->set<ConstraintRange>(sym, New);
    398 }
    399 
    400 ProgramStateRef
    401 RangeConstraintManager::assumeSymLE(ProgramStateRef state, SymbolRef sym,
    402                                     const llvm::APSInt& Int,
    403                                     const llvm::APSInt& Adjustment) {
    404   BasicValueFactory &BV = state->getBasicVals();
    405 
    406   QualType T = state->getSymbolManager().getType(sym);
    407   const llvm::APSInt &Max = BV.getMaxValue(T);
    408 
    409   // Special case for Int == Max. This is always feasible.
    410   if (Int == Max)
    411     return state;
    412 
    413   const llvm::APSInt &Min = BV.getMinValue(T);
    414 
    415   llvm::APSInt Lower = Min-Adjustment;
    416   llvm::APSInt Upper = Int-Adjustment;
    417 
    418   RangeSet New = GetRange(state, sym).Intersect(BV, F, Lower, Upper);
    419   return New.isEmpty() ? NULL : state->set<ConstraintRange>(sym, New);
    420 }
    421 
    422 //===------------------------------------------------------------------------===
    423 // Pretty-printing.
    424 //===------------------------------------------------------------------------===/
    425 
    426 void RangeConstraintManager::print(ProgramStateRef St, raw_ostream &Out,
    427                                    const char* nl, const char *sep) {
    428 
    429   ConstraintRangeTy Ranges = St->get<ConstraintRange>();
    430 
    431   if (Ranges.isEmpty()) {
    432     Out << nl << sep << "Ranges are empty." << nl;
    433     return;
    434   }
    435 
    436   Out << nl << sep << "Ranges of symbol values:";
    437   for (ConstraintRangeTy::iterator I=Ranges.begin(), E=Ranges.end(); I!=E; ++I){
    438     Out << nl << ' ' << I.getKey() << " : ";
    439     I.getData().print(Out);
    440   }
    441   Out << nl;
    442 }
    443