Home | History | Annotate | Download | only in lib
      1 // partition.h
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 //
     16 // \file Functions and classes to create a partition of states
     17 //
     18 
     19 #ifndef FST_LIB_PARTITION_H__
     20 #define FST_LIB_PARTITION_H__
     21 
     22 #include <algorithm>
     23 #include <vector>
     24 
     25 #include "fst/lib/queue.h"
     26 
     27 
     28 namespace fst {
     29 
     30 template <typename T> class PartitionIterator;
     31 
     32 // \class Partition
     33 // \brief Defines a partitioning of states. Typically used to represent
     34 //        equivalence classes for Fst operations like minimization.
     35 //
     36 template <typename T>
     37 class Partition {
     38   friend class PartitionIterator<T>;
     39 
     40   struct Element {
     41    Element() : value(0), next(0), prev(0) {}
     42    Element(T v) : value(v), next(0), prev(0) {}
     43 
     44    T        value;
     45    Element* next;
     46    Element* prev;
     47   };
     48 
     49  public:
     50   Partition() {}
     51 
     52   Partition(T num_states) {
     53     Initialize(num_states);
     54   }
     55 
     56   ~Partition() {
     57     for (size_t i = 0; i < elements_.size(); ++i)
     58       delete elements_[i];
     59   }
     60 
     61   // Create an empty partition for num_states. At initialization time
     62   // all elements are not assigned to a class (i.e class_index = -1).
     63   // Initialize just creates num_states of elements. All element
     64   // operations are then done by simply disconnecting the element from
     65   // it current class and placing it at the head of the next class.
     66   void Initialize(size_t num_states) {
     67     for (size_t i = 0; i < elements_.size(); ++i)
     68       delete elements_[i];
     69     elements_.clear();
     70     classes_.clear();
     71     class_index_.clear();
     72 
     73     elements_.resize(num_states);
     74     class_index_.resize(num_states, -1);
     75     class_size_.reserve(num_states);
     76     for (size_t i = 0; i < num_states; ++i)
     77       elements_[i] = new Element(i);
     78     num_states_ = num_states;
     79   }
     80 
     81   // Add a class, resize classes_ and class_size_ resource by 1.
     82   size_t AddClass() {
     83     size_t num_classes = classes_.size();
     84     classes_.resize(num_classes + 1, 0);
     85     class_size_.resize(num_classes + 1, 0);
     86     class_split_.resize(num_classes + 1, 0);
     87     split_size_.resize(num_classes + 1, 0);
     88     return num_classes;
     89   }
     90 
     91   void AllocateClasses(T num_classes) {
     92     size_t n = classes_.size() + num_classes;
     93     classes_.resize(n, 0);
     94     class_size_.resize(n, 0);
     95     class_split_.resize(n, 0);
     96     split_size_.resize(n, 0);
     97   }
     98 
     99   // Add element_id to class_id. The Add method is used to initialize
    100   // partition. Once elements have been added to a class, you need to
    101   // use the Move() method move an element from once class to another.
    102   void Add(T element_id, T class_id) {
    103     Element* element = elements_[element_id];
    104 
    105     if (classes_[class_id])
    106       classes_[class_id]->prev = element;
    107     element->next = classes_[class_id];
    108     element->prev = 0;
    109     classes_[class_id] = element;
    110 
    111     class_index_[element_id] = class_id;
    112     class_size_[class_id]++;
    113   }
    114 
    115   // Move and element_id to class_id. Disconnects (removes) element
    116   // from it current class and
    117   void Move(T element_id, T class_id) {
    118     T old_class_id = class_index_[element_id];
    119 
    120     Element* element = elements_[element_id];
    121     if (element->next) element->next->prev = element->prev;
    122     if (element->prev) element->prev->next = element->next;
    123     else               classes_[old_class_id] = element->next;
    124 
    125     Add(element_id, class_id);
    126     class_size_[old_class_id]--;
    127   }
    128 
    129   // split class on the element_id
    130   void SplitOn(T element_id) {
    131     T class_id = class_index_[element_id];
    132     if (class_size_[class_id] == 1) return;
    133 
    134     // first time class is split
    135     if (split_size_[class_id] == 0)
    136       visited_classes_.push_back(class_id);
    137 
    138     // increment size of split (set of element at head of chain)
    139     split_size_[class_id]++;
    140 
    141     // update split point
    142     if (class_split_[class_id] == 0)
    143       class_split_[class_id] = classes_[class_id];
    144     if (class_split_[class_id] == elements_[element_id])
    145       class_split_[class_id] = elements_[element_id]->next;
    146 
    147     // move to head of chain in same class
    148     Move(element_id, class_id);
    149   }
    150 
    151   // Finalize class_id, split if required, and update class_splits,
    152   // class indices of the newly created class. Returns the new_class id
    153   // or -1 if no new class was created.
    154   T SplitRefine(T class_id) {
    155     // only split if necessary
    156     if (class_size_[class_id] == split_size_[class_id]) {
    157       class_split_[class_id] = 0;
    158       split_size_[class_id] = 0;
    159       return -1;
    160     } else {
    161 
    162       T new_class = AddClass();
    163       size_t remainder = class_size_[class_id] - split_size_[class_id];
    164       if (remainder < (size_t)split_size_[class_id]) {  // add smaller
    165         Element* split_el   = class_split_[class_id];
    166         classes_[new_class] = split_el;
    167         class_size_[class_id] = split_size_[class_id];
    168         class_size_[new_class] = remainder;
    169         split_el->prev->next = 0;
    170         split_el->prev = 0;
    171       } else {
    172         Element* split_el   = class_split_[class_id];
    173         classes_[new_class] = classes_[class_id];
    174         class_size_[class_id] = remainder;
    175         class_size_[new_class] = split_size_[class_id];
    176         split_el->prev->next = 0;
    177         split_el->prev = 0;
    178         classes_[class_id] = split_el;
    179       }
    180 
    181       // update class index for element in new class
    182       for (Element* el = classes_[new_class]; el; el = el->next)
    183         class_index_[el->value] = new_class;
    184 
    185       class_split_[class_id] = 0;
    186       split_size_[class_id] = 0;
    187 
    188       return new_class;
    189     }
    190   }
    191 
    192   // Once all states have been processed for a particular class C, we
    193   // can finalize the split. FinalizeSplit() will update each block in the
    194   // partition, create new once and update the queue of active classes
    195   // that require further refinement.
    196   template <class Queue>
    197   void FinalizeSplit(Queue* L) {
    198     for (size_t i = 0; i < visited_classes_.size(); ++i) {
    199       T new_class = SplitRefine(visited_classes_[i]);
    200       if (new_class != -1 && L)
    201         L->Enqueue(new_class);
    202     }
    203     visited_classes_.clear();
    204   }
    205 
    206 
    207   const T class_id(T element_id) const {
    208     return class_index_[element_id];
    209   }
    210 
    211   const vector<T>& class_sizes() const {
    212     return class_size_;
    213   }
    214 
    215   const size_t class_size(T class_id)  const {
    216     return class_size_[class_id];
    217   }
    218 
    219   const T num_classes() const {
    220     return classes_.size();
    221   }
    222 
    223 
    224  private:
    225   int num_states_;
    226 
    227   // container of all elements (owner of ptrs)
    228   vector<Element*> elements_;
    229 
    230   // linked list of elements belonging to class
    231   vector<Element*> classes_;
    232 
    233   // pointer to split point for each class
    234   vector<Element*> class_split_;
    235 
    236   // class index of element
    237   vector<T> class_index_;
    238 
    239   // class sizes
    240   vector<T> class_size_;
    241 
    242   // size of split for each class
    243   vector<T> split_size_;
    244 
    245   // set of visited classes to be used in split refine
    246   vector<T> visited_classes_;
    247 };
    248 
    249 
    250 // iterate over members of a class in a partition
    251 template <typename T>
    252 class PartitionIterator {
    253   typedef typename Partition<T>::Element Element;
    254  public:
    255   PartitionIterator(const Partition<T>& partition, T class_id)
    256       : p_(partition),
    257         element_(p_.classes_[class_id]),
    258         class_id_(class_id) {}
    259 
    260   bool Done() {
    261     return (element_ == 0);
    262   }
    263 
    264   const T Value() {
    265     return (element_->value);
    266   }
    267 
    268   void Next() {
    269     element_ = element_->next;
    270   }
    271 
    272   void Reset() {
    273     element_ = p_.classes_[class_id_];
    274   }
    275 
    276  private:
    277   const Partition<T>& p_;
    278 
    279   const Element* element_;
    280 
    281   T class_id_;
    282 };
    283 }
    284 
    285 #endif  // FST_LIB_PARTITION_H__
    286