Home | History | Annotate | Download | only in common
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 // Notes on thread-safety: All of the classes here are thread-compatible.  More
     18 // specifically, the registry machinery is thread-safe, as long as each thread
     19 // performs feature extraction on a different Sentence object.
     20 
     21 #ifndef LIBTEXTCLASSIFIER_COMMON_WORKSPACE_H_
     22 #define LIBTEXTCLASSIFIER_COMMON_WORKSPACE_H_
     23 
     24 #include <stddef.h>
     25 #include <string>
     26 #include <unordered_map>
     27 #include <utility>
     28 #include <vector>
     29 
     30 #include "util/base/logging.h"
     31 #include "util/base/macros.h"
     32 
     33 namespace libtextclassifier {
     34 namespace nlp_core {
     35 
     36 // A base class for shared workspaces. Derived classes implement a static member
     37 // function TypeName() which returns a human readable std::string name for the
     38 // class.
     39 class Workspace {
     40  public:
     41   // Polymorphic destructor.
     42   virtual ~Workspace() {}
     43 
     44  protected:
     45   // Create an empty workspace.
     46   Workspace() {}
     47 
     48  private:
     49   TC_DISALLOW_COPY_AND_ASSIGN(Workspace);
     50 };
     51 
     52 // Returns a new, strictly increasing int every time it is invoked.
     53 int GetFreshTypeId();
     54 
     55 // Struct to simulate typeid, but without RTTI.
     56 template <typename T>
     57 struct TypeId {
     58   static int type_id;
     59 };
     60 
     61 template <typename T>
     62 int TypeId<T>::type_id = GetFreshTypeId();
     63 
     64 // A registry that keeps track of workspaces.
     65 class WorkspaceRegistry {
     66  public:
     67   // Create an empty registry.
     68   WorkspaceRegistry() {}
     69 
     70   // Returns the index of a named workspace, adding it to the registry first
     71   // if necessary.
     72   template <class W>
     73   int Request(const std::string &name) {
     74     const int id = TypeId<W>::type_id;
     75     max_workspace_id_ = std::max(id, max_workspace_id_);
     76     workspace_types_[id] = W::TypeName();
     77     std::vector<std::string> &names = workspace_names_[id];
     78     for (int i = 0; i < names.size(); ++i) {
     79       if (names[i] == name) return i;
     80     }
     81     names.push_back(name);
     82     return names.size() - 1;
     83   }
     84 
     85   // Returns the maximum workspace id that has been registered.
     86   int MaxId() const {
     87     return max_workspace_id_;
     88   }
     89 
     90   const std::unordered_map<int, std::vector<std::string> > &WorkspaceNames()
     91       const {
     92     return workspace_names_;
     93   }
     94 
     95   // Returns a std::string describing the registered workspaces.
     96   std::string DebugString() const;
     97 
     98  private:
     99   // Workspace type names, indexed as workspace_types_[typeid].
    100   std::unordered_map<int, std::string> workspace_types_;
    101 
    102   // Workspace names, indexed as workspace_names_[typeid][workspace].
    103   std::unordered_map<int, std::vector<std::string> > workspace_names_;
    104 
    105   // The maximum workspace id that has been registered.
    106   int max_workspace_id_ = 0;
    107 
    108   TC_DISALLOW_COPY_AND_ASSIGN(WorkspaceRegistry);
    109 };
    110 
    111 // A typed collected of workspaces. The workspaces are indexed according to an
    112 // external WorkspaceRegistry. If the WorkspaceSet is const, the contents are
    113 // also immutable.
    114 class WorkspaceSet {
    115  public:
    116   ~WorkspaceSet() { Reset(WorkspaceRegistry()); }
    117 
    118   // Returns true if a workspace has been set.
    119   template <class W>
    120   bool Has(int index) const {
    121     const int id = TypeId<W>::type_id;
    122     TC_DCHECK_GE(id, 0);
    123     TC_DCHECK_LT(id, workspaces_.size());
    124     TC_DCHECK_GE(index, 0);
    125     TC_DCHECK_LT(index, workspaces_[id].size());
    126     if (id >= workspaces_.size()) return false;
    127     return workspaces_[id][index] != nullptr;
    128   }
    129 
    130   // Returns an indexed workspace; the workspace must have been set.
    131   template <class W>
    132   const W &Get(int index) const {
    133     TC_DCHECK(Has<W>(index));
    134     const int id = TypeId<W>::type_id;
    135     const Workspace *w = workspaces_[id][index];
    136     return reinterpret_cast<const W &>(*w);
    137   }
    138 
    139   // Sets an indexed workspace; this takes ownership of the workspace, which
    140   // must have been new-allocated.  It is an error to set a workspace twice.
    141   template <class W>
    142   void Set(int index, W *workspace) {
    143     const int id = TypeId<W>::type_id;
    144     TC_DCHECK_GE(id, 0);
    145     TC_DCHECK_LT(id, workspaces_.size());
    146     TC_DCHECK_GE(index, 0);
    147     TC_DCHECK_LT(index, workspaces_[id].size());
    148     TC_DCHECK(workspaces_[id][index] == nullptr);
    149     TC_DCHECK(workspace != nullptr);
    150     workspaces_[id][index] = workspace;
    151   }
    152 
    153   void Reset(const WorkspaceRegistry &registry) {
    154     // Deallocate current workspaces.
    155     for (auto &it : workspaces_) {
    156       for (size_t index = 0; index < it.size(); ++index) {
    157         delete it[index];
    158       }
    159     }
    160     workspaces_.clear();
    161     workspaces_.resize(registry.MaxId() + 1, std::vector<Workspace *>());
    162     for (auto &it : registry.WorkspaceNames()) {
    163       workspaces_[it.first].resize(it.second.size());
    164     }
    165   }
    166 
    167  private:
    168   // The set of workspaces, indexed as workspaces_[typeid][index].
    169   std::vector<std::vector<Workspace *> > workspaces_;
    170 };
    171 
    172 // A workspace that wraps around a single int.
    173 class SingletonIntWorkspace : public Workspace {
    174  public:
    175   // Default-initializes the int value.
    176   SingletonIntWorkspace() {}
    177 
    178   // Initializes the int with the given value.
    179   explicit SingletonIntWorkspace(int value) : value_(value) {}
    180 
    181   // Returns the name of this type of workspace.
    182   static std::string TypeName() { return "SingletonInt"; }
    183 
    184   // Returns the int value.
    185   int get() const { return value_; }
    186 
    187   // Sets the int value.
    188   void set(int value) { value_ = value; }
    189 
    190  private:
    191   // The enclosed int.
    192   int value_ = 0;
    193 };
    194 
    195 // A workspace that wraps around a vector of int.
    196 class VectorIntWorkspace : public Workspace {
    197  public:
    198   // Creates a vector of the given size.
    199   explicit VectorIntWorkspace(int size);
    200 
    201   // Creates a vector initialized with the given array.
    202   explicit VectorIntWorkspace(const std::vector<int> &elements);
    203 
    204   // Creates a vector of the given size, with each element initialized to the
    205   // given value.
    206   VectorIntWorkspace(int size, int value);
    207 
    208   // Returns the name of this type of workspace.
    209   static std::string TypeName();
    210 
    211   // Returns the i'th element.
    212   int element(int i) const { return elements_[i]; }
    213 
    214   // Sets the i'th element.
    215   void set_element(int i, int value) { elements_[i] = value; }
    216 
    217  private:
    218   // The enclosed vector.
    219   std::vector<int> elements_;
    220 };
    221 
    222 // A workspace that wraps around a vector of vector of int.
    223 class VectorVectorIntWorkspace : public Workspace {
    224  public:
    225   // Creates a vector of empty vectors of the given size.
    226   explicit VectorVectorIntWorkspace(int size);
    227 
    228   // Returns the name of this type of workspace.
    229   static std::string TypeName();
    230 
    231   // Returns the i'th vector of elements.
    232   const std::vector<int> &elements(int i) const { return elements_[i]; }
    233 
    234   // Mutable access to the i'th vector of elements.
    235   std::vector<int> *mutable_elements(int i) { return &(elements_[i]); }
    236 
    237  private:
    238   // The enclosed vector of vector of elements.
    239   std::vector<std::vector<int> > elements_;
    240 };
    241 
    242 }  // namespace nlp_core
    243 }  // namespace libtextclassifier
    244 
    245 #endif  // LIBTEXTCLASSIFIER_COMMON_WORKSPACE_H_
    246