1 // partition.h 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // 16 // \file Functions and classes to create a partition of states 17 // 18 19 #ifndef FST_LIB_PARTITION_H__ 20 #define FST_LIB_PARTITION_H__ 21 22 #include <algorithm> 23 #include <vector> 24 25 #include "fst/lib/queue.h" 26 27 28 namespace fst { 29 30 template <typename T> class PartitionIterator; 31 32 // \class Partition 33 // \brief Defines a partitioning of states. Typically used to represent 34 // equivalence classes for Fst operations like minimization. 35 // 36 template <typename T> 37 class Partition { 38 friend class PartitionIterator<T>; 39 40 struct Element { 41 Element() : value(0), next(0), prev(0) {} 42 Element(T v) : value(v), next(0), prev(0) {} 43 44 T value; 45 Element* next; 46 Element* prev; 47 }; 48 49 public: 50 Partition() {} 51 52 Partition(T num_states) { 53 Initialize(num_states); 54 } 55 56 ~Partition() { 57 for (size_t i = 0; i < elements_.size(); ++i) 58 delete elements_[i]; 59 } 60 61 // Create an empty partition for num_states. At initialization time 62 // all elements are not assigned to a class (i.e class_index = -1). 63 // Initialize just creates num_states of elements. All element 64 // operations are then done by simply disconnecting the element from 65 // it current class and placing it at the head of the next class. 66 void Initialize(size_t num_states) { 67 for (size_t i = 0; i < elements_.size(); ++i) 68 delete elements_[i]; 69 elements_.clear(); 70 classes_.clear(); 71 class_index_.clear(); 72 73 elements_.resize(num_states); 74 class_index_.resize(num_states, -1); 75 class_size_.reserve(num_states); 76 for (size_t i = 0; i < num_states; ++i) 77 elements_[i] = new Element(i); 78 num_states_ = num_states; 79 } 80 81 // Add a class, resize classes_ and class_size_ resource by 1. 82 size_t AddClass() { 83 size_t num_classes = classes_.size(); 84 classes_.resize(num_classes + 1, 0); 85 class_size_.resize(num_classes + 1, 0); 86 class_split_.resize(num_classes + 1, 0); 87 split_size_.resize(num_classes + 1, 0); 88 return num_classes; 89 } 90 91 void AllocateClasses(T num_classes) { 92 size_t n = classes_.size() + num_classes; 93 classes_.resize(n, 0); 94 class_size_.resize(n, 0); 95 class_split_.resize(n, 0); 96 split_size_.resize(n, 0); 97 } 98 99 // Add element_id to class_id. The Add method is used to initialize 100 // partition. Once elements have been added to a class, you need to 101 // use the Move() method move an element from once class to another. 102 void Add(T element_id, T class_id) { 103 Element* element = elements_[element_id]; 104 105 if (classes_[class_id]) 106 classes_[class_id]->prev = element; 107 element->next = classes_[class_id]; 108 element->prev = 0; 109 classes_[class_id] = element; 110 111 class_index_[element_id] = class_id; 112 class_size_[class_id]++; 113 } 114 115 // Move and element_id to class_id. Disconnects (removes) element 116 // from it current class and 117 void Move(T element_id, T class_id) { 118 T old_class_id = class_index_[element_id]; 119 120 Element* element = elements_[element_id]; 121 if (element->next) element->next->prev = element->prev; 122 if (element->prev) element->prev->next = element->next; 123 else classes_[old_class_id] = element->next; 124 125 Add(element_id, class_id); 126 class_size_[old_class_id]--; 127 } 128 129 // split class on the element_id 130 void SplitOn(T element_id) { 131 T class_id = class_index_[element_id]; 132 if (class_size_[class_id] == 1) return; 133 134 // first time class is split 135 if (split_size_[class_id] == 0) 136 visited_classes_.push_back(class_id); 137 138 // increment size of split (set of element at head of chain) 139 split_size_[class_id]++; 140 141 // update split point 142 if (class_split_[class_id] == 0) 143 class_split_[class_id] = classes_[class_id]; 144 if (class_split_[class_id] == elements_[element_id]) 145 class_split_[class_id] = elements_[element_id]->next; 146 147 // move to head of chain in same class 148 Move(element_id, class_id); 149 } 150 151 // Finalize class_id, split if required, and update class_splits, 152 // class indices of the newly created class. Returns the new_class id 153 // or -1 if no new class was created. 154 T SplitRefine(T class_id) { 155 // only split if necessary 156 if (class_size_[class_id] == split_size_[class_id]) { 157 class_split_[class_id] = 0; 158 split_size_[class_id] = 0; 159 return -1; 160 } else { 161 162 T new_class = AddClass(); 163 size_t remainder = class_size_[class_id] - split_size_[class_id]; 164 if (remainder < (size_t)split_size_[class_id]) { // add smaller 165 Element* split_el = class_split_[class_id]; 166 classes_[new_class] = split_el; 167 class_size_[class_id] = split_size_[class_id]; 168 class_size_[new_class] = remainder; 169 split_el->prev->next = 0; 170 split_el->prev = 0; 171 } else { 172 Element* split_el = class_split_[class_id]; 173 classes_[new_class] = classes_[class_id]; 174 class_size_[class_id] = remainder; 175 class_size_[new_class] = split_size_[class_id]; 176 split_el->prev->next = 0; 177 split_el->prev = 0; 178 classes_[class_id] = split_el; 179 } 180 181 // update class index for element in new class 182 for (Element* el = classes_[new_class]; el; el = el->next) 183 class_index_[el->value] = new_class; 184 185 class_split_[class_id] = 0; 186 split_size_[class_id] = 0; 187 188 return new_class; 189 } 190 } 191 192 // Once all states have been processed for a particular class C, we 193 // can finalize the split. FinalizeSplit() will update each block in the 194 // partition, create new once and update the queue of active classes 195 // that require further refinement. 196 template <class Queue> 197 void FinalizeSplit(Queue* L) { 198 for (size_t i = 0; i < visited_classes_.size(); ++i) { 199 T new_class = SplitRefine(visited_classes_[i]); 200 if (new_class != -1 && L) 201 L->Enqueue(new_class); 202 } 203 visited_classes_.clear(); 204 } 205 206 207 const T class_id(T element_id) const { 208 return class_index_[element_id]; 209 } 210 211 const vector<T>& class_sizes() const { 212 return class_size_; 213 } 214 215 const size_t class_size(T class_id) const { 216 return class_size_[class_id]; 217 } 218 219 const T num_classes() const { 220 return classes_.size(); 221 } 222 223 224 private: 225 int num_states_; 226 227 // container of all elements (owner of ptrs) 228 vector<Element*> elements_; 229 230 // linked list of elements belonging to class 231 vector<Element*> classes_; 232 233 // pointer to split point for each class 234 vector<Element*> class_split_; 235 236 // class index of element 237 vector<T> class_index_; 238 239 // class sizes 240 vector<T> class_size_; 241 242 // size of split for each class 243 vector<T> split_size_; 244 245 // set of visited classes to be used in split refine 246 vector<T> visited_classes_; 247 }; 248 249 250 // iterate over members of a class in a partition 251 template <typename T> 252 class PartitionIterator { 253 typedef typename Partition<T>::Element Element; 254 public: 255 PartitionIterator(const Partition<T>& partition, T class_id) 256 : p_(partition), 257 element_(p_.classes_[class_id]), 258 class_id_(class_id) {} 259 260 bool Done() { 261 return (element_ == 0); 262 } 263 264 const T Value() { 265 return (element_->value); 266 } 267 268 void Next() { 269 element_ = element_->next; 270 } 271 272 void Reset() { 273 element_ = p_.classes_[class_id_]; 274 } 275 276 private: 277 const Partition<T>& p_; 278 279 const Element* element_; 280 281 T class_id_; 282 }; 283 } 284 285 #endif // FST_LIB_PARTITION_H__ 286