Home | History | Annotate | Download | only in fst
      1 // interval-set.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: riley (at) google.com (Michael Riley)
     17 //
     18 // \file
     19 // Class to represent and operate on sets of intervals.
     20 
     21 #ifndef FST_LIB_INTERVAL_SET_H__
     22 #define FST_LIB_INTERVAL_SET_H__
     23 
     24 #include <iostream>
     25 #include <vector>
     26 using std::vector;
     27 
     28 
     29 #include <fst/util.h>
     30 
     31 
     32 namespace fst {
     33 
     34 // Stores and operates on a set of half-open integral intervals [a,b)
     35 // of signed integers of type T.
     36 template <typename T>
     37 class IntervalSet {
     38  public:
     39   struct Interval {
     40     T begin;
     41     T end;
     42 
     43     Interval() : begin(-1), end(-1) {}
     44 
     45     Interval(T b, T e) : begin(b), end(e) {}
     46 
     47     bool operator<(const Interval &i) const {
     48       return begin < i.begin || (begin == i.begin && end > i.end);
     49     }
     50 
     51     bool operator==(const Interval &i) const {
     52       return begin == i.begin && end == i.end;
     53     }
     54 
     55     bool operator!=(const Interval &i) const {
     56       return begin != i.begin || end != i.end;
     57     }
     58 
     59     istream &Read(istream &strm) {
     60       T n;
     61       ReadType(strm, &n);
     62       begin = n;
     63       ReadType(strm, &n);
     64       end = n;
     65       return strm;
     66     }
     67 
     68     ostream &Write(ostream &strm) const {
     69       T n = begin;
     70       WriteType(strm, n);
     71       n = end;
     72       WriteType(strm, n);
     73       return strm;
     74     }
     75   };
     76 
     77   IntervalSet() : count_(-1) {}
     78 
     79   // Returns the interval set as a vector.
     80   vector<Interval> *Intervals() { return &intervals_; }
     81 
     82   const vector<Interval> *Intervals() const { return &intervals_; }
     83 
     84   bool Empty() const { return intervals_.empty(); }
     85 
     86   T Size() const { return intervals_.size(); }
     87 
     88   // Number of points in the intervals (undefined if not normalized).
     89   T Count() const { return count_; }
     90 
     91   void Clear() {
     92     intervals_.clear();
     93     count_ = 0;
     94   }
     95 
     96   // Adds an interval set to the set. The result may not be normalized.
     97   void Union(const IntervalSet<T> &iset) {
     98     const vector<Interval> *intervals = iset.Intervals();
     99     for (typename vector<Interval>::const_iterator it = intervals->begin();
    100          it != intervals->end(); ++it)
    101       intervals_.push_back(*it);
    102   }
    103 
    104   // Requires intervals be normalized.
    105   bool Member(T value) const {
    106     Interval interval(value, value);
    107     typename vector<Interval>::const_iterator lb =
    108         lower_bound(intervals_.begin(), intervals_.end(), interval);
    109     if (lb == intervals_.begin())
    110       return false;
    111     return (--lb)->end > value;
    112   }
    113 
    114   // Requires intervals be normalized.
    115   bool operator==(const IntervalSet<T>& iset) const {
    116     return *(iset.Intervals()) == intervals_;
    117   }
    118 
    119   // Requires intervals be normalized.
    120   bool operator!=(const IntervalSet<T>& iset) const {
    121     return *(iset.Intervals()) != intervals_;
    122   }
    123 
    124   bool Singleton() const {
    125     return intervals_.size() == 1 &&
    126         intervals_[0].begin + 1 == intervals_[0].end;
    127   }
    128 
    129 
    130   // Sorts; collapses overlapping and adjacent interals; sets count.
    131   void Normalize();
    132 
    133   // Intersects an interval set with the set. Requires intervals be
    134   // normalized. The result is normalized.
    135   void Intersect(const IntervalSet<T> &iset, IntervalSet<T> *oset) const;
    136 
    137   // Complements the set w.r.t [0, maxval). Requires intervals be
    138   // normalized. The result is normalized.
    139   void Complement(T maxval, IntervalSet<T> *oset) const;
    140 
    141   // Subtract an interval set from the set. Requires intervals be
    142   // normalized. The result is normalized.
    143   void Difference(const IntervalSet<T> &iset, IntervalSet<T> *oset) const;
    144 
    145   // Determines if an interval set overlaps with the set. Requires
    146   // intervals be normalized.
    147   bool Overlaps(const IntervalSet<T> &iset) const;
    148 
    149   // Determines if an interval set overlaps with the set but neither
    150   // is contained in the other. Requires intervals be normalized.
    151   bool StrictlyOverlaps(const IntervalSet<T> &iset) const;
    152 
    153   // Determines if an interval set is contained within the set. Requires
    154   // intervals be normalized.
    155   bool Contains(const IntervalSet<T> &iset) const;
    156 
    157   istream &Read(istream &strm) {
    158     ReadType(strm, &intervals_);
    159     return ReadType(strm, &count_);
    160   }
    161 
    162   ostream &Write(ostream &strm) const {
    163     WriteType(strm, intervals_);
    164     return WriteType(strm, count_);
    165   }
    166 
    167  private:
    168   vector<Interval> intervals_;
    169   T count_;
    170 };
    171 
    172 // Sorts; collapses overlapping and adjacent interavls; sets count.
    173 template <typename T>
    174 void IntervalSet<T>::Normalize() {
    175   sort(intervals_.begin(), intervals_.end());
    176 
    177   count_ = 0;
    178   T size = 0;
    179   for (T i = 0; i < intervals_.size(); ++i) {
    180     Interval &inti = intervals_[i];
    181     if (inti.begin == inti.end)
    182       continue;
    183     for (T j = i + 1; j < intervals_.size(); ++j) {
    184       Interval &intj = intervals_[j];
    185       if (intj.begin > inti.end)
    186         break;
    187       if (intj.end > inti.end)
    188         inti.end = intj.end;
    189       ++i;
    190     }
    191     count_ += inti.end - inti.begin;
    192     intervals_[size++] = inti;
    193   }
    194   intervals_.resize(size);
    195 }
    196 
    197 // Intersects an interval set with the set. Requires intervals be normalized.
    198 // The result is normalized.
    199 template <typename T>
    200 void IntervalSet<T>::Intersect(const IntervalSet<T> &iset,
    201                                IntervalSet<T> *oset) const {
    202   const vector<Interval> *iintervals = iset.Intervals();
    203   vector<Interval> *ointervals = oset->Intervals();
    204   typename vector<Interval>::const_iterator it1 = intervals_.begin();
    205   typename vector<Interval>::const_iterator it2 = iintervals->begin();
    206 
    207   ointervals->clear();
    208   oset->count_ = 0;
    209 
    210   while (it1 != intervals_.end() && it2 != iintervals->end()) {
    211     if (it1->end <= it2->begin) {
    212       ++it1;
    213     } else if (it2->end <= it1->begin) {
    214       ++it2;
    215     } else {
    216       Interval interval;
    217       interval.begin = max(it1->begin, it2->begin);
    218       interval.end = min(it1->end, it2->end);
    219       ointervals->push_back(interval);
    220       oset->count_ += interval.end - interval.begin;
    221       if (it1->end < it2->end)
    222         ++it1;
    223       else
    224         ++it2;
    225     }
    226   }
    227 }
    228 
    229 // Complements the set w.r.t [0, maxval). Requires intervals be normalized.
    230 // The result is normalized.
    231 template <typename T>
    232 void IntervalSet<T>::Complement(T maxval, IntervalSet<T> *oset) const {
    233   vector<Interval> *ointervals = oset->Intervals();
    234   ointervals->clear();
    235   oset->count_ = 0;
    236 
    237   Interval interval;
    238   interval.begin = 0;
    239   for (typename vector<Interval>::const_iterator it = intervals_.begin();
    240        it != intervals_.end();
    241        ++it) {
    242     interval.end = min(it->begin, maxval);
    243     if (interval.begin < interval.end) {
    244       ointervals->push_back(interval);
    245       oset->count_ += interval.end - interval.begin;
    246     }
    247     interval.begin = it->end;
    248   }
    249   interval.end = maxval;
    250   if (interval.begin < interval.end) {
    251     ointervals->push_back(interval);
    252     oset->count_ += interval.end - interval.begin;
    253   }
    254 }
    255 
    256 // Subtract an interval set from the set. Requires intervals be normalized.
    257 // The result is normalized.
    258 template <typename T>
    259 void IntervalSet<T>::Difference(const IntervalSet<T> &iset,
    260                                 IntervalSet<T> *oset) const {
    261   if (intervals_.empty()) {
    262     oset->Intervals()->clear();
    263     oset->count_ = 0;
    264   } else {
    265     IntervalSet<T> cset;
    266     iset.Complement(intervals_.back().end, &cset);
    267     Intersect(cset, oset);
    268   }
    269 }
    270 
    271 // Determines if an interval set overlaps with the set. Requires
    272 // intervals be normalized.
    273 template <typename T>
    274 bool IntervalSet<T>::Overlaps(const IntervalSet<T> &iset) const {
    275   const vector<Interval> *intervals = iset.Intervals();
    276   typename vector<Interval>::const_iterator it1 = intervals_.begin();
    277   typename vector<Interval>::const_iterator it2 = intervals->begin();
    278 
    279   while (it1 != intervals_.end() && it2 != intervals->end()) {
    280     if (it1->end <= it2->begin) {
    281       ++it1;
    282     } else if (it2->end <= it1->begin) {
    283       ++it2;
    284     } else {
    285       return true;
    286     }
    287   }
    288   return false;
    289 }
    290 
    291 // Determines if an interval set overlaps with the set but neither
    292 // is contained in the other. Requires intervals be normalized.
    293 template <typename T>
    294 bool IntervalSet<T>::StrictlyOverlaps(const IntervalSet<T> &iset) const {
    295   const vector<Interval> *intervals = iset.Intervals();
    296   typename vector<Interval>::const_iterator it1 = intervals_.begin();
    297   typename vector<Interval>::const_iterator it2 = intervals->begin();
    298   bool only1 = false;   // point in intervals_ but not intervals
    299   bool only2 = false;   // point in intervals but not intervals_
    300   bool overlap = false; // point in both intervals_ and intervals
    301 
    302   while (it1 != intervals_.end() && it2 != intervals->end()) {
    303     if (it1->end <= it2->begin) {  // no overlap - it1 first
    304       only1 = true;
    305       ++it1;
    306     } else if (it2->end <= it1->begin) {  // no overlap - it2 first
    307       only2 = true;
    308       ++it2;
    309     } else if (it2->begin == it1->begin && it2->end == it1->end) {  // equals
    310       overlap = true;
    311       ++it1;
    312       ++it2;
    313     } else if (it2->begin <= it1->begin && it2->end >= it1->end) {  // 1 c 2
    314       only2 = true;
    315       overlap = true;
    316       ++it1;
    317     } else if (it1->begin <= it2->begin && it1->end >= it2->end) {  // 2 c 1
    318       only1 = true;
    319       overlap = true;
    320       ++it2;
    321     } else {  // strict overlap
    322       only1 = true;
    323       only2 = true;
    324       overlap = true;
    325     }
    326     if (only1 == true && only2 == true && overlap == true)
    327       return true;
    328   }
    329   if (it1 != intervals_.end())
    330     only1 = true;
    331   if (it2 != intervals->end())
    332     only2 = true;
    333 
    334   return only1 == true && only2 == true && overlap == true;
    335 }
    336 
    337 // Determines if an interval set is contained within the set. Requires
    338 // intervals be normalized.
    339 template <typename T>
    340 bool IntervalSet<T>::Contains(const IntervalSet<T> &iset) const {
    341   if (iset.Count() > Count())
    342     return false;
    343 
    344   const vector<Interval> *intervals = iset.Intervals();
    345   typename vector<Interval>::const_iterator it1 = intervals_.begin();
    346   typename vector<Interval>::const_iterator it2 = intervals->begin();
    347 
    348   while (it1 != intervals_.end() && it2 != intervals->end()) {
    349     if (it1->end <= it2->begin) {  // no overlap - it1 first
    350       ++it1;
    351     } else if (it2->begin < it1->begin || it2->end > it1->end) {  // no C
    352       return false;
    353     } else if (it2->end == it1->end) {
    354       ++it1;
    355       ++it2;
    356     } else {
    357       ++it2;
    358     }
    359   }
    360   return it2 == intervals->end();
    361 }
    362 
    363 template <typename T>
    364 ostream &operator<<(ostream &strm, const IntervalSet<T> &s)  {
    365   typedef typename IntervalSet<T>::Interval Interval;
    366   const vector<Interval> *intervals = s.Intervals();
    367   strm << "{";
    368   for (typename vector<Interval>::const_iterator it = intervals->begin();
    369        it != intervals->end();
    370        ++it) {
    371     if (it != intervals->begin())
    372       strm << ",";
    373     strm << "[" << it->begin << "," << it->end << ")";
    374   }
    375   strm << "}";
    376   return strm;
    377 }
    378 
    379 }  // namespace fst
    380 
    381 #endif  // FST_LIB_INTERVAL_SET_H__
    382