Home | History | Annotate | Download | only in fst
      1 // partition.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: johans (at) google.com (Johan Schalkwyk)
     17 //
     18 // \file Functions and classes to create a partition of states
     19 //
     20 
     21 #ifndef FST_LIB_PARTITION_H__
     22 #define FST_LIB_PARTITION_H__
     23 
     24 #include <vector>
     25 using std::vector;
     26 #include <algorithm>
     27 
     28 
     29 #include <fst/queue.h>
     30 
     31 
     32 
     33 namespace fst {
     34 
     35 template <typename T> class PartitionIterator;
     36 
     37 // \class Partition
     38 // \brief Defines a partitioning of states. Typically used to represent
     39 //        equivalence classes for Fst operations like minimization.
     40 //
     41 template <typename T>
     42 class Partition {
     43   friend class PartitionIterator<T>;
     44 
     45   struct Element {
     46    Element() : value(0), next(0), prev(0) {}
     47    Element(T v) : value(v), next(0), prev(0) {}
     48 
     49    T        value;
     50    Element* next;
     51    Element* prev;
     52   };
     53 
     54  public:
     55   Partition() {}
     56 
     57   Partition(T num_states) {
     58     Initialize(num_states);
     59   }
     60 
     61   ~Partition() {
     62     for (size_t i = 0; i < elements_.size(); ++i)
     63       delete elements_[i];
     64   }
     65 
     66   // Create an empty partition for num_states. At initialization time
     67   // all elements are not assigned to a class (i.e class_index = -1).
     68   // Initialize just creates num_states of elements. All element
     69   // operations are then done by simply disconnecting the element from
     70   // it current class and placing it at the head of the next class.
     71   void Initialize(size_t num_states) {
     72     for (size_t i = 0; i < elements_.size(); ++i)
     73       delete elements_[i];
     74     elements_.clear();
     75     classes_.clear();
     76     class_index_.clear();
     77 
     78     elements_.resize(num_states);
     79     class_index_.resize(num_states, -1);
     80     class_size_.reserve(num_states);
     81     for (size_t i = 0; i < num_states; ++i)
     82       elements_[i] = new Element(i);
     83     num_states_ = num_states;
     84   }
     85 
     86   // Add a class, resize classes_ and class_size_ resource by 1.
     87   size_t AddClass() {
     88     size_t num_classes = classes_.size();
     89     classes_.resize(num_classes + 1, 0);
     90     class_size_.resize(num_classes + 1, 0);
     91     class_split_.resize(num_classes + 1, 0);
     92     split_size_.resize(num_classes + 1, 0);
     93     return num_classes;
     94   }
     95 
     96   void AllocateClasses(T num_classes) {
     97     size_t n = classes_.size() + num_classes;
     98     classes_.resize(n, 0);
     99     class_size_.resize(n, 0);
    100     class_split_.resize(n, 0);
    101     split_size_.resize(n, 0);
    102   }
    103 
    104   // Add element_id to class_id. The Add method is used to initialize
    105   // partition. Once elements have been added to a class, you need to
    106   // use the Move() method move an element from once class to another.
    107   void Add(T element_id, T class_id) {
    108     Element* element = elements_[element_id];
    109 
    110     if (classes_[class_id])
    111       classes_[class_id]->prev = element;
    112     element->next = classes_[class_id];
    113     element->prev = 0;
    114     classes_[class_id] = element;
    115 
    116     class_index_[element_id] = class_id;
    117     class_size_[class_id]++;
    118   }
    119 
    120   // Move and element_id to class_id. Disconnects (removes) element
    121   // from it current class and
    122   void Move(T element_id, T class_id) {
    123     T old_class_id = class_index_[element_id];
    124 
    125     Element* element = elements_[element_id];
    126     if (element->next) element->next->prev = element->prev;
    127     if (element->prev) element->prev->next = element->next;
    128     else               classes_[old_class_id] = element->next;
    129 
    130     Add(element_id, class_id);
    131     class_size_[old_class_id]--;
    132   }
    133 
    134   // split class on the element_id
    135   void SplitOn(T element_id) {
    136     T class_id = class_index_[element_id];
    137     if (class_size_[class_id] == 1) return;
    138 
    139     // first time class is split
    140     if (split_size_[class_id] == 0)
    141       visited_classes_.push_back(class_id);
    142 
    143     // increment size of split (set of element at head of chain)
    144     split_size_[class_id]++;
    145 
    146     // update split point
    147     if (class_split_[class_id] == 0)
    148       class_split_[class_id] = classes_[class_id];
    149     if (class_split_[class_id] == elements_[element_id])
    150       class_split_[class_id] = elements_[element_id]->next;
    151 
    152     // move to head of chain in same class
    153     Move(element_id, class_id);
    154   }
    155 
    156   // Finalize class_id, split if required, and update class_splits,
    157   // class indices of the newly created class. Returns the new_class id
    158   // or -1 if no new class was created.
    159   T SplitRefine(T class_id) {
    160     // only split if necessary
    161     if (class_size_[class_id] == split_size_[class_id]) {
    162       class_split_[class_id] = 0;
    163       split_size_[class_id] = 0;
    164       return -1;
    165     } else {
    166 
    167       T new_class = AddClass();
    168       size_t remainder = class_size_[class_id] - split_size_[class_id];
    169       if (remainder < split_size_[class_id]) {  // add smaller
    170         Element* split_el   = class_split_[class_id];
    171         classes_[new_class] = split_el;
    172         class_size_[class_id] = split_size_[class_id];
    173         class_size_[new_class] = remainder;
    174         split_el->prev->next = 0;
    175         split_el->prev = 0;
    176       } else {
    177         Element* split_el   = class_split_[class_id];
    178         classes_[new_class] = classes_[class_id];
    179         class_size_[class_id] = remainder;
    180         class_size_[new_class] = split_size_[class_id];
    181         split_el->prev->next = 0;
    182         split_el->prev = 0;
    183         classes_[class_id] = split_el;
    184       }
    185 
    186       // update class index for element in new class
    187       for (Element* el = classes_[new_class]; el; el = el->next)
    188         class_index_[el->value] = new_class;
    189 
    190       class_split_[class_id] = 0;
    191       split_size_[class_id] = 0;
    192 
    193       return new_class;
    194     }
    195   }
    196 
    197   // Once all states have been processed for a particular class C, we
    198   // can finalize the split. FinalizeSplit() will update each block in the
    199   // partition, create new once and update the queue of active classes
    200   // that require further refinement.
    201   template <class Queue>
    202   void FinalizeSplit(Queue* L) {
    203     for (size_t i = 0; i < visited_classes_.size(); ++i) {
    204       T new_class = SplitRefine(visited_classes_[i]);
    205       if (new_class != -1 && L)
    206         L->Enqueue(new_class);
    207     }
    208     visited_classes_.clear();
    209   }
    210 
    211 
    212   const T class_id(T element_id) const {
    213     return class_index_[element_id];
    214   }
    215 
    216   const vector<T>& class_sizes() const {
    217     return class_size_;
    218   }
    219 
    220   const size_t class_size(T class_id)  const {
    221     return class_size_[class_id];
    222   }
    223 
    224   const T num_classes() const {
    225     return classes_.size();
    226   }
    227 
    228 
    229  private:
    230   int num_states_;
    231 
    232   // container of all elements (owner of ptrs)
    233   vector<Element*> elements_;
    234 
    235   // linked list of elements belonging to class
    236   vector<Element*> classes_;
    237 
    238   // pointer to split point for each class
    239   vector<Element*> class_split_;
    240 
    241   // class index of element
    242   vector<T> class_index_;
    243 
    244   // class sizes
    245   vector<T> class_size_;
    246 
    247   // size of split for each class
    248   vector<T> split_size_;
    249 
    250   // set of visited classes to be used in split refine
    251   vector<T> visited_classes_;
    252 };
    253 
    254 
    255 // iterate over members of a class in a partition
    256 template <typename T>
    257 class PartitionIterator {
    258   typedef typename Partition<T>::Element Element;
    259  public:
    260   PartitionIterator(const Partition<T>& partition, T class_id)
    261       : p_(partition),
    262         element_(p_.classes_[class_id]),
    263         class_id_(class_id) {}
    264 
    265   bool Done() {
    266     return (element_ == 0);
    267   }
    268 
    269   const T Value() {
    270     return (element_->value);
    271   }
    272 
    273   void Next() {
    274     element_ = element_->next;
    275   }
    276 
    277   void Reset() {
    278     element_ = p_.classes_[class_id_];
    279   }
    280 
    281  private:
    282   const Partition<T>& p_;
    283 
    284   const Element* element_;
    285 
    286   T class_id_;
    287 };
    288 }  // namespace fst
    289 
    290 #endif  // FST_LIB_PARTITION_H__
    291