Home | History | Annotate | Download | only in pdt
      1 // pdt.h
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Copyright 2005-2010 Google, Inc.
     16 // Author: riley (at) google.com (Michael Riley)
     17 //
     18 // \file
     19 // Common classes for PDT expansion/traversal.
     20 
     21 #ifndef FST_EXTENSIONS_PDT_PDT_H__
     22 #define FST_EXTENSIONS_PDT_PDT_H__
     23 
     24 #include <tr1/unordered_map>
     25 using std::tr1::unordered_map;
     26 using std::tr1::unordered_multimap;
     27 #include <map>
     28 #include <set>
     29 
     30 #include <fst/compat.h>
     31 #include <fst/state-table.h>
     32 #include <fst/fst.h>
     33 
     34 namespace fst {
     35 
     36 // Provides bijection between parenthesis stacks and signed integral
     37 // stack IDs. Each stack ID is unique to each distinct stack.  The
     38 // open-close parenthesis label pairs are passed in 'parens'.
     39 template <typename K, typename L>
     40 class PdtStack {
     41  public:
     42   typedef K StackId;
     43   typedef L Label;
     44 
     45   // The stacks are stored in a tree. The nodes are stored in vector
     46   // 'nodes_'. Each node represents the top of some stack and is
     47   // ID'ed by its position in the vector. Its parent node represents
     48   // the stack with the top 'popped' and its children are stored in
     49   // 'child_map_' accessed by stack_id and label. The paren_id is
     50   // the position in 'parens' of the parenthesis for that node.
     51   struct StackNode {
     52     StackId parent_id;
     53     size_t paren_id;
     54 
     55     StackNode(StackId p, size_t i) : parent_id(p), paren_id(i) {}
     56   };
     57 
     58   PdtStack(const vector<pair<Label, Label> > &parens)
     59       : parens_(parens), min_paren_(kNoLabel), max_paren_(kNoLabel) {
     60     for (size_t i = 0; i < parens.size(); ++i) {
     61       const pair<Label, Label>  &p = parens[i];
     62       paren_map_[p.first] = i;
     63       paren_map_[p.second] = i;
     64 
     65       if (min_paren_ == kNoLabel || p.first < min_paren_)
     66         min_paren_ = p.first;
     67       if (p.second < min_paren_)
     68         min_paren_ = p.second;
     69 
     70       if (max_paren_ == kNoLabel || p.first > max_paren_)
     71         max_paren_ = p.first;
     72       if (p.second > max_paren_)
     73         max_paren_ = p.second;
     74     }
     75     nodes_.push_back(StackNode(-1, -1));  // Tree root.
     76   }
     77 
     78   // Returns stack ID given the current stack ID (0 if empty) and
     79   // label read. 'Pushes' onto a stack if the label is an open
     80   // parenthesis, returning the new stack ID. 'Pops' the stack if the
     81   // label is a close parenthesis that matches the top of the stack,
     82   // returning the parent stack ID. Returns -1 if label is an
     83   // unmatched close parenthesis. Otherwise, returns the current stack
     84   // ID.
     85   StackId Find(StackId stack_id, Label label) {
     86     if (min_paren_ == kNoLabel || label < min_paren_ || label > max_paren_)
     87       return stack_id;                       // Non-paren.
     88 
     89     typename unordered_map<Label, size_t>::const_iterator pit
     90         = paren_map_.find(label);
     91     if (pit == paren_map_.end())             // Non-paren.
     92       return stack_id;
     93     ssize_t paren_id = pit->second;
     94 
     95     if (label == parens_[paren_id].first) {  // Open paren.
     96       StackId &child_id = child_map_[make_pair(stack_id, label)];
     97       if (child_id == 0) {                   // Child not found, push label.
     98         child_id = nodes_.size();
     99         nodes_.push_back(StackNode(stack_id, paren_id));
    100       }
    101       return child_id;
    102     }
    103 
    104     const StackNode &node = nodes_[stack_id];
    105     if (paren_id == node.paren_id)           // Matching close paren.
    106       return node.parent_id;
    107 
    108     return -1;                               // Non-matching close paren.
    109   }
    110 
    111   // Returns the stack ID obtained by "popping" the label at the top
    112   // of the current stack ID.
    113   StackId Pop(StackId stack_id) const {
    114     return nodes_[stack_id].parent_id;
    115   }
    116 
    117   // Returns the paren ID at the top of the stack for 'stack_id'
    118   ssize_t Top(StackId stack_id) const {
    119     return nodes_[stack_id].paren_id;
    120   }
    121 
    122   ssize_t ParenId(Label label) const {
    123     typename unordered_map<Label, size_t>::const_iterator pit
    124         = paren_map_.find(label);
    125     if (pit == paren_map_.end())  // Non-paren.
    126       return -1;
    127     return pit->second;
    128   }
    129 
    130  private:
    131   struct ChildHash {
    132     size_t operator()(const pair<StackId, Label> &p) const {
    133       return p.first + p.second * kPrime;
    134     }
    135   };
    136 
    137   static const size_t kPrime;
    138 
    139   vector<pair<Label, Label> > parens_;
    140   vector<StackNode> nodes_;
    141   unordered_map<Label, size_t> paren_map_;
    142   unordered_map<pair<StackId, Label>,
    143            StackId, ChildHash> child_map_;   // Child of stack node wrt label
    144   Label min_paren_;                          // For faster paren. check
    145   Label max_paren_;                          // For faster paren. check
    146 };
    147 
    148 template <typename T, typename L>
    149 const size_t PdtStack<T, L>::kPrime = 7853;
    150 
    151 
    152 // State tuple for PDT expansion
    153 template <typename S, typename K>
    154 struct PdtStateTuple {
    155   typedef S StateId;
    156   typedef K StackId;
    157 
    158   StateId state_id;
    159   StackId stack_id;
    160 
    161   PdtStateTuple()
    162       : state_id(kNoStateId), stack_id(-1) {}
    163 
    164   PdtStateTuple(StateId fs, StackId ss)
    165       : state_id(fs), stack_id(ss) {}
    166 };
    167 
    168 // Equality of PDT state tuples.
    169 template <typename S, typename K>
    170 inline bool operator==(const PdtStateTuple<S, K>& x,
    171                        const PdtStateTuple<S, K>& y) {
    172   if (&x == &y)
    173     return true;
    174   return x.state_id == y.state_id && x.stack_id == y.stack_id;
    175 }
    176 
    177 
    178 // Hash function object for PDT state tuples
    179 template <class T>
    180 class PdtStateHash {
    181  public:
    182   size_t operator()(const T &tuple) const {
    183     return tuple.state_id + tuple.stack_id * kPrime;
    184   }
    185 
    186  private:
    187   static const size_t kPrime;
    188 };
    189 
    190 template <typename T>
    191 const size_t PdtStateHash<T>::kPrime = 7853;
    192 
    193 
    194 // Tuple to PDT state bijection.
    195 template <class S, class K>
    196 class PdtStateTable
    197     : public CompactHashStateTable<PdtStateTuple<S, K>,
    198                                    PdtStateHash<PdtStateTuple<S, K> > > {
    199  public:
    200   typedef S StateId;
    201   typedef K StackId;
    202 
    203   PdtStateTable() {}
    204 
    205   PdtStateTable(const PdtStateTable<S, K> &table) {}
    206 
    207  private:
    208   void operator=(const PdtStateTable<S, K> &table);  // disallow
    209 };
    210 
    211 }  // namespace fst
    212 
    213 #endif  // FST_EXTENSIONS_PDT_PDT_H__
    214