Home | History | Annotate | Download | only in fel
      1 /*
      2  * Copyright (C) 2018 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 // Common feature types for parser components.
     18 
     19 #ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_TYPES_H_
     20 #define NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_TYPES_H_
     21 
     22 #include <algorithm>
     23 #include <map>
     24 #include <string>
     25 #include <utility>
     26 
     27 #include "lang_id/common/lite_base/integral-types.h"
     28 #include "lang_id/common/lite_base/logging.h"
     29 #include "lang_id/common/lite_strings/str-cat.h"
     30 
     31 namespace libtextclassifier3 {
     32 namespace mobile {
     33 
     34 // TODO(djweiss) Clean this up as well.
     35 // Use the same type for feature values as is used for predicated.
     36 typedef int64 Predicate;
     37 typedef Predicate FeatureValue;
     38 
     39 // Each feature value in a feature vector has a feature type. The feature type
     40 // is used for converting feature type and value pairs to predicate values. The
     41 // feature type can also return names for feature values and calculate the size
     42 // of the feature value domain. The FeatureType class is abstract and must be
     43 // specialized for the concrete feature types.
     44 class FeatureType {
     45  public:
     46   // Initializes a feature type.
     47   explicit FeatureType(const string &name)
     48       : name_(name), base_(0),
     49         is_continuous_(name.find("continuous") != string::npos) {
     50   }
     51 
     52   virtual ~FeatureType() {}
     53 
     54   // Converts a feature value to a name.
     55   virtual string GetFeatureValueName(FeatureValue value) const = 0;
     56 
     57   // Returns the size of the feature values domain.
     58   virtual int64 GetDomainSize() const = 0;
     59 
     60   // Returns the feature type name.
     61   const string &name() const { return name_; }
     62 
     63   Predicate base() const { return base_; }
     64   void set_base(Predicate base) { base_ = base; }
     65 
     66   // Returns true iff this feature is continuous; see FloatFeatureValue.
     67   bool is_continuous() const { return is_continuous_; }
     68 
     69  private:
     70   // Feature type name.
     71   string name_;
     72 
     73   // "Base" feature value: i.e. a "slot" in a global ordering of features.
     74   Predicate base_;
     75 
     76   // See doc for is_continuous().
     77   bool is_continuous_;
     78 };
     79 
     80 // Feature type that is defined using an explicit map from FeatureValue to
     81 // string values.  This can reduce some of the boilerplate when defining
     82 // features that generate enum values.  Example usage:
     83 //
     84 //   class BeverageSizeFeature : public FeatureFunction<Beverage>
     85 //     enum FeatureValue { SMALL, MEDIUM, LARGE };  // values for this feature
     86 //     void Init(TaskContext *context) override {
     87 //       set_feature_type(new EnumFeatureType("beverage_size",
     88 //           {{SMALL, "SMALL"}, {MEDIUM, "MEDIUM"}, {LARGE, "LARGE"}});
     89 //     }
     90 //     [...]
     91 //   };
     92 class EnumFeatureType : public FeatureType {
     93  public:
     94   EnumFeatureType(const string &name,
     95                   const std::map<FeatureValue, string> &value_names)
     96       : FeatureType(name), value_names_(value_names) {
     97     for (const auto &pair : value_names) {
     98       SAFTM_CHECK_GE(pair.first, 0)
     99           << "Invalid feature value: " << pair.first << ", " << pair.second;
    100       domain_size_ = std::max(domain_size_, pair.first + 1);
    101     }
    102   }
    103 
    104   // Returns the feature name for a given feature value.
    105   string GetFeatureValueName(FeatureValue value) const override {
    106     auto it = value_names_.find(value);
    107     if (it == value_names_.end()) {
    108       SAFTM_LOG(ERROR) << "Invalid feature value " << value << " for "
    109                        << name();
    110       return "<INVALID>";
    111     }
    112     return it->second;
    113   }
    114 
    115   // Returns the number of possible values for this feature type. This is one
    116   // greater than the largest value in the value_names map.
    117   FeatureValue GetDomainSize() const override { return domain_size_; }
    118 
    119  protected:
    120   // Maximum possible value this feature could take.
    121   FeatureValue domain_size_ = 0;
    122 
    123   // Names of feature values.
    124   std::map<FeatureValue, string> value_names_;
    125 };
    126 
    127 // Feature type for binary features.
    128 class BinaryFeatureType : public FeatureType {
    129  public:
    130   BinaryFeatureType(const string &name, const string &off, const string &on)
    131       : FeatureType(name), off_(off), on_(on) {}
    132 
    133   // Returns the feature name for a given feature value.
    134   string GetFeatureValueName(FeatureValue value) const override {
    135     if (value == 0) return off_;
    136     if (value == 1) return on_;
    137     return "";
    138   }
    139 
    140   // Binary features always have two feature values.
    141   FeatureValue GetDomainSize() const override { return 2; }
    142 
    143  private:
    144   // Feature value names for on and off.
    145   string off_;
    146   string on_;
    147 };
    148 
    149 // Feature type for numeric features.
    150 class NumericFeatureType : public FeatureType {
    151  public:
    152   // Initializes numeric feature.
    153   NumericFeatureType(const string &name, FeatureValue size)
    154       : FeatureType(name), size_(size) {}
    155 
    156   // Returns numeric feature value.
    157   string GetFeatureValueName(FeatureValue value) const override {
    158     if (value < 0) return "";
    159     return LiteStrCat(value);
    160   }
    161 
    162   // Returns the number of feature values.
    163   FeatureValue GetDomainSize() const override { return size_; }
    164 
    165  private:
    166   // The underlying size of the numeric feature.
    167   FeatureValue size_;
    168 };
    169 
    170 // Feature type for byte features, including an "outside" value.
    171 class ByteFeatureType : public NumericFeatureType {
    172  public:
    173   explicit ByteFeatureType(const string &name)
    174       : NumericFeatureType(name, 257) {}
    175 
    176   string GetFeatureValueName(FeatureValue value) const override {
    177     if (value == 256) {
    178       return "<NULL>";
    179     }
    180     string result;
    181     result += static_cast<char>(value);
    182     return result;
    183   }
    184 };
    185 
    186 }  // namespace mobile
    187 }  // namespace nlp_saft
    188 
    189 #endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_TYPES_H_
    190