Home | History | Annotate | Download | only in common
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "common/feature-extractor.h"
     18 
     19 #include "common/feature-types.h"
     20 #include "common/fml-parser.h"
     21 #include "util/base/integral_types.h"
     22 #include "util/base/logging.h"
     23 #include "util/gtl/stl_util.h"
     24 #include "util/strings/numbers.h"
     25 
     26 namespace libtextclassifier {
     27 namespace nlp_core {
     28 
     29 constexpr FeatureValue GenericFeatureFunction::kNone;
     30 
     31 GenericFeatureExtractor::GenericFeatureExtractor() {}
     32 
     33 GenericFeatureExtractor::~GenericFeatureExtractor() {}
     34 
     35 bool GenericFeatureExtractor::Parse(const std::string &source) {
     36   // Parse feature specification into descriptor.
     37   FMLParser parser;
     38   if (!parser.Parse(source, mutable_descriptor())) return false;
     39 
     40   // Initialize feature extractor from descriptor.
     41   if (!InitializeFeatureFunctions()) return false;
     42   return true;
     43 }
     44 
     45 bool GenericFeatureExtractor::InitializeFeatureTypes() {
     46   // Register all feature types.
     47   GetFeatureTypes(&feature_types_);
     48   for (size_t i = 0; i < feature_types_.size(); ++i) {
     49     FeatureType *ft = feature_types_[i];
     50     ft->set_base(i);
     51 
     52     // Check for feature space overflow.
     53     double domain_size = ft->GetDomainSize();
     54     if (domain_size < 0) {
     55       TC_LOG(ERROR) << "Illegal domain size for feature " << ft->name() << ": "
     56                     << domain_size;
     57       return false;
     58     }
     59   }
     60   return true;
     61 }
     62 
     63 FeatureValue GenericFeatureExtractor::GetDomainSize() const {
     64   // Domain size of the set of features is equal to:
     65   //   [largest domain size of any feature types] * [number of feature types]
     66   FeatureValue max_feature_type_dsize = 0;
     67   for (size_t i = 0; i < feature_types_.size(); ++i) {
     68     FeatureType *ft = feature_types_[i];
     69     const FeatureValue feature_type_dsize = ft->GetDomainSize();
     70     if (feature_type_dsize > max_feature_type_dsize) {
     71       max_feature_type_dsize = feature_type_dsize;
     72     }
     73   }
     74 
     75   return max_feature_type_dsize * feature_types_.size();
     76 }
     77 
     78 std::string GenericFeatureFunction::GetParameter(
     79     const std::string &name) const {
     80   // Find named parameter in feature descriptor.
     81   for (int i = 0; i < descriptor_->parameter_size(); ++i) {
     82     if (name == descriptor_->parameter(i).name()) {
     83       return descriptor_->parameter(i).value();
     84     }
     85   }
     86   return "";
     87 }
     88 
     89 GenericFeatureFunction::GenericFeatureFunction() {}
     90 
     91 GenericFeatureFunction::~GenericFeatureFunction() { delete feature_type_; }
     92 
     93 int GenericFeatureFunction::GetIntParameter(const std::string &name,
     94                                             int default_value) const {
     95   int32 parsed_value = default_value;
     96   std::string value = GetParameter(name);
     97   if (!value.empty()) {
     98     if (!ParseInt32(value.c_str(), &parsed_value)) {
     99       // A parameter value has been specified, but it can't be parsed as an int.
    100       // We don't crash: instead, we long an error and return the default value.
    101       TC_LOG(ERROR) << "Value of param " << name << " is not an int: " << value;
    102     }
    103   }
    104   return parsed_value;
    105 }
    106 
    107 bool GenericFeatureFunction::GetBoolParameter(const std::string &name,
    108                                               bool default_value) const {
    109   std::string value = GetParameter(name);
    110   if (value.empty()) return default_value;
    111   if (value == "true") return true;
    112   if (value == "false") return false;
    113   TC_LOG(ERROR) << "Illegal value '" << value << "' for bool parameter '"
    114                 << name << "'"
    115                 << " will assume default " << default_value;
    116   return default_value;
    117 }
    118 
    119 void GenericFeatureFunction::GetFeatureTypes(
    120     std::vector<FeatureType *> *types) const {
    121   if (feature_type_ != nullptr) types->push_back(feature_type_);
    122 }
    123 
    124 FeatureType *GenericFeatureFunction::GetFeatureType() const {
    125   // If a single feature type has been registered return it.
    126   if (feature_type_ != nullptr) return feature_type_;
    127 
    128   // Get feature types for function.
    129   std::vector<FeatureType *> types;
    130   GetFeatureTypes(&types);
    131 
    132   // If there is exactly one feature type return this, else return null.
    133   if (types.size() == 1) return types[0];
    134   return nullptr;
    135 }
    136 
    137 std::string GenericFeatureFunction::name() const {
    138   std::string output;
    139   if (descriptor_->name().empty()) {
    140     if (!prefix_.empty()) {
    141       output.append(prefix_);
    142       output.append(".");
    143     }
    144     ToFML(*descriptor_, &output);
    145   } else {
    146     output = descriptor_->name();
    147   }
    148   return output;
    149 }
    150 
    151 }  // namespace nlp_core
    152 }  // namespace libtextclassifier
    153