Home | History | Annotate | Download | only in fst
      1 
      2 // Licensed under the Apache License, Version 2.0 (the "License");
      3 // you may not use this file except in compliance with the License.
      4 // You may obtain a copy of the License at
      5 //
      6 //     http://www.apache.org/licenses/LICENSE-2.0
      7 //
      8 // Unless required by applicable law or agreed to in writing, software
      9 // distributed under the License is distributed on an "AS IS" BASIS,
     10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     11 // See the License for the specific language governing permissions and
     12 // limitations under the License.
     13 //
     14 // Copyright 2005-2010 Google, Inc.
     15 // Author: wojciech (at) google.com (Wojciech Skut)
     16 //
     17 // \file Union-Find algorithm for dense sets of non-negative
     18 // integers. Implemented using disjoint tree forests with rank
     19 // heuristics and path compression.
     20 
     21 #ifndef __fst_union_find_inl_h__
     22 #define __fst_union_find_inl_h__
     23 
     24 #include <stack>
     25 #include <vector>
     26 using std::vector;
     27 #include <fst/types.h>
     28 
     29 namespace fst {
     30 
     31 // Union-Find algorithm for dense sets of non-negative integers
     32 // (exact type: T).
     33 template <class T>
     34 class UnionFind {
     35  public:
     36   // Ctor: creates a disjoint set forest for the range [0;max).
     37   // 'fail' is a value indicating that an element hasn't been
     38   // initialized using MakeSet(...). The upper bound of the range
     39   // can be reset (increased) using MakeSet(...).
     40   UnionFind(T max, T fail)
     41       : parent_(max, fail), rank_(max), fail_(fail) { }
     42 
     43   // Finds the representative of the set 'item' belongs to.
     44   // Performs path compression if needed.
     45   T FindSet(T item) {
     46     if (item >= parent_.size()
     47         || item == fail_
     48         || parent_[item] == fail_) return fail_;
     49 
     50     T *p = &parent_[item];
     51     for (; *p != item; item = *p, p = &parent_[item]) {
     52       exec_stack_.push(p);
     53     }
     54     for (; ! exec_stack_.empty(); exec_stack_.pop()) {
     55       *exec_stack_.top() = *p;
     56     }
     57     return *p;
     58   }
     59 
     60   // Creates the (destructive) union of the sets x and y belong to.
     61   void Union(T x, T y) {
     62     Link(FindSet(x), FindSet(y));
     63   }
     64 
     65   // Initialization of an element: creates a singleton set containing
     66   // 'item'. The range [0;max) is reset if item >= max.
     67   T MakeSet(T item) {
     68     if (item >= parent_.size()) {
     69       // New value in parent_ should be initialized to fail_
     70       size_t nitem = item > 0 ? 2 * item : 2;
     71       parent_.resize(nitem, fail_);
     72       rank_.resize(nitem);
     73     }
     74     parent_[item] = item;
     75     return item;
     76   }
     77 
     78   // Initialization of all elements starting from 0 to max - 1 to distinct sets
     79   void MakeAllSet(T max) {
     80     parent_.resize(max);
     81     for (T item = 0; item < max; ++item) {
     82       parent_[item] = item;
     83     }
     84   }
     85 
     86  private:
     87   vector<T> parent_;      // Parent nodes.
     88   vector<int> rank_;      // Rank of an element = min. depth in tree.
     89   T fail_;                // Value indicating lookup failure.
     90   stack<T*> exec_stack_;  // Used for path compression.
     91 
     92   // Links trees rooted in 'x' and 'y'.
     93   void Link(T x, T y) {
     94     if (x == y) return;
     95 
     96     if (rank_[x] > rank_[y]) {
     97       parent_[y] = x;
     98     } else {
     99       parent_[x] = y;
    100       if (rank_[x] == rank_[y]) {
    101         ++rank_[y];
    102       }
    103     }
    104   }
    105   DISALLOW_COPY_AND_ASSIGN(UnionFind);
    106 };
    107 
    108 }  // namespace fst
    109 
    110 #endif  // __fst_union_find_inl_h__
    111