1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "common/feature-extractor.h" 18 19 #include "common/feature-types.h" 20 #include "common/fml-parser.h" 21 #include "util/base/integral_types.h" 22 #include "util/base/logging.h" 23 #include "util/gtl/stl_util.h" 24 #include "util/strings/numbers.h" 25 26 namespace libtextclassifier { 27 namespace nlp_core { 28 29 constexpr FeatureValue GenericFeatureFunction::kNone; 30 31 GenericFeatureExtractor::GenericFeatureExtractor() {} 32 33 GenericFeatureExtractor::~GenericFeatureExtractor() {} 34 35 bool GenericFeatureExtractor::Parse(const std::string &source) { 36 // Parse feature specification into descriptor. 37 FMLParser parser; 38 if (!parser.Parse(source, mutable_descriptor())) return false; 39 40 // Initialize feature extractor from descriptor. 41 if (!InitializeFeatureFunctions()) return false; 42 return true; 43 } 44 45 bool GenericFeatureExtractor::InitializeFeatureTypes() { 46 // Register all feature types. 47 GetFeatureTypes(&feature_types_); 48 for (size_t i = 0; i < feature_types_.size(); ++i) { 49 FeatureType *ft = feature_types_[i]; 50 ft->set_base(i); 51 52 // Check for feature space overflow. 53 double domain_size = ft->GetDomainSize(); 54 if (domain_size < 0) { 55 TC_LOG(ERROR) << "Illegal domain size for feature " << ft->name() << ": " 56 << domain_size; 57 return false; 58 } 59 } 60 return true; 61 } 62 63 FeatureValue GenericFeatureExtractor::GetDomainSize() const { 64 // Domain size of the set of features is equal to: 65 // [largest domain size of any feature types] * [number of feature types] 66 FeatureValue max_feature_type_dsize = 0; 67 for (size_t i = 0; i < feature_types_.size(); ++i) { 68 FeatureType *ft = feature_types_[i]; 69 const FeatureValue feature_type_dsize = ft->GetDomainSize(); 70 if (feature_type_dsize > max_feature_type_dsize) { 71 max_feature_type_dsize = feature_type_dsize; 72 } 73 } 74 75 return max_feature_type_dsize * feature_types_.size(); 76 } 77 78 std::string GenericFeatureFunction::GetParameter( 79 const std::string &name) const { 80 // Find named parameter in feature descriptor. 81 for (int i = 0; i < descriptor_->parameter_size(); ++i) { 82 if (name == descriptor_->parameter(i).name()) { 83 return descriptor_->parameter(i).value(); 84 } 85 } 86 return ""; 87 } 88 89 GenericFeatureFunction::GenericFeatureFunction() {} 90 91 GenericFeatureFunction::~GenericFeatureFunction() { delete feature_type_; } 92 93 int GenericFeatureFunction::GetIntParameter(const std::string &name, 94 int default_value) const { 95 int32 parsed_value = default_value; 96 std::string value = GetParameter(name); 97 if (!value.empty()) { 98 if (!ParseInt32(value.c_str(), &parsed_value)) { 99 // A parameter value has been specified, but it can't be parsed as an int. 100 // We don't crash: instead, we long an error and return the default value. 101 TC_LOG(ERROR) << "Value of param " << name << " is not an int: " << value; 102 } 103 } 104 return parsed_value; 105 } 106 107 bool GenericFeatureFunction::GetBoolParameter(const std::string &name, 108 bool default_value) const { 109 std::string value = GetParameter(name); 110 if (value.empty()) return default_value; 111 if (value == "true") return true; 112 if (value == "false") return false; 113 TC_LOG(ERROR) << "Illegal value '" << value << "' for bool parameter '" 114 << name << "'" 115 << " will assume default " << default_value; 116 return default_value; 117 } 118 119 void GenericFeatureFunction::GetFeatureTypes( 120 std::vector<FeatureType *> *types) const { 121 if (feature_type_ != nullptr) types->push_back(feature_type_); 122 } 123 124 FeatureType *GenericFeatureFunction::GetFeatureType() const { 125 // If a single feature type has been registered return it. 126 if (feature_type_ != nullptr) return feature_type_; 127 128 // Get feature types for function. 129 std::vector<FeatureType *> types; 130 GetFeatureTypes(&types); 131 132 // If there is exactly one feature type return this, else return null. 133 if (types.size() == 1) return types[0]; 134 return nullptr; 135 } 136 137 std::string GenericFeatureFunction::name() const { 138 std::string output; 139 if (descriptor_->name().empty()) { 140 if (!prefix_.empty()) { 141 output.append(prefix_); 142 output.append("."); 143 } 144 ToFML(*descriptor_, &output); 145 } else { 146 output = descriptor_->name(); 147 } 148 return output; 149 } 150 151 } // namespace nlp_core 152 } // namespace libtextclassifier 153