1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Stochastic Linear Ranking algorithms. 18 // This class will implement a set of incremental algorithms for ranking tasks 19 // They support both L1 and L2 regularizations. 20 21 22 #ifndef LEARNING_STOCHASTIC_LINEAR_STOCHASTIC_LINEAR_RANKER_H_ 23 #define LEARNING_STOCHASTIC_LINEAR_STOCHASTIC_LINEAR_RANKER_H_ 24 25 #include <sys/types.h> 26 27 #include <cmath> 28 #include <string> 29 #include <unordered_map> 30 31 #include "cutils/log.h" 32 #include "common_defs.h" 33 #include "learning_rate_controller-inl.h" 34 #include "sparse_weight_vector.h" 35 36 namespace learning_stochastic_linear { 37 38 // NOTE: This Stochastic Linear Ranker supports only the following update types: 39 // SL: Stochastic Linear 40 // CS: Constraint Satisfaction 41 template<class Key = std::string, class Hash = std::unordered_map<std::string, double> > 42 class StochasticLinearRanker { 43 public: 44 // initialize lambda_ and constraint to a meaningful default. Will give 45 // equal weight to the error and regularizer. 46 StochasticLinearRanker() { 47 iteration_num_ = 0; 48 lambda_ = 1.0; 49 learning_rate_controller_.SetLambda(lambda_); 50 mini_batch_size_ = 1; 51 learning_rate_controller_.SetMiniBatchSize(mini_batch_size_); 52 adaptation_mode_ = INV_LINEAR; 53 learning_rate_controller_.SetAdaptationMode(adaptation_mode_); 54 update_type_ = SL; 55 regularization_type_ = L2; 56 kernel_type_ = LINEAR; 57 kernel_param_ = 1.0; 58 kernel_gain_ = 1.0; 59 kernel_bias_ = 0.0; 60 rank_loss_type_ = PAIRWISE; 61 acceptence_probability_ = 0.1; 62 mini_batch_counter_ = 0; 63 gradient_l0_norm_ = -1; 64 norm_constraint_ = 1.0; 65 } 66 67 ~StochasticLinearRanker() {} 68 // Getters and setters 69 double GetIterationNumber() const { 70 return iteration_num_; 71 } 72 double GetNormContraint() const { 73 return norm_constraint_; 74 } 75 RegularizationType GetRegularizationType() const { 76 return regularization_type_; 77 } 78 double GetLambda() const { 79 return lambda_; 80 } 81 uint64 GetMiniBatchSize() const { 82 return mini_batch_size_; 83 } 84 int32 GetGradientL0Norm() const { 85 return gradient_l0_norm_; 86 } 87 UpdateType GetUpdateType() const { 88 return update_type_; 89 } 90 AdaptationMode GetAdaptationMode() const { 91 return adaptation_mode_; 92 } 93 KernelType GetKernelType() const { 94 return kernel_type_; 95 } 96 // This function returns the basic kernel parameter. In case of 97 // polynomial kernel, it implies the degree of the polynomial. In case of 98 // RBF kernel, it implies the sigma parameter. In case of linear kernel, 99 // it is not used. 100 double GetKernelParam() const { 101 return kernel_param_; 102 } 103 double GetKernelGain() const { 104 return kernel_gain_;; 105 } 106 double GetKernelBias() const { 107 return kernel_bias_; 108 } 109 RankLossType GetRankLossType() const { 110 return rank_loss_type_; 111 } 112 double GetAcceptanceProbability() const { 113 return acceptence_probability_; 114 } 115 void SetIterationNumber(uint64 num) { 116 iteration_num_=num; 117 } 118 void SetNormConstraint(const double norm) { 119 norm_constraint_ = norm; 120 } 121 void SetRegularizationType(const RegularizationType r) { 122 regularization_type_ = r; 123 } 124 void SetLambda(double l) { 125 lambda_ = l; 126 learning_rate_controller_.SetLambda(l); 127 } 128 void SetMiniBatchSize(const uint64 msize) { 129 mini_batch_size_ = msize; 130 learning_rate_controller_.SetMiniBatchSize(msize); 131 } 132 void SetAdaptationMode(AdaptationMode m) { 133 adaptation_mode_ = m; 134 learning_rate_controller_.SetAdaptationMode(m); 135 } 136 void SetKernelType(KernelType k ) { 137 kernel_type_ = k; 138 } 139 // This function sets the basic kernel parameter. In case of 140 // polynomial kernel, it implies the degree of the polynomial. In case of 141 // RBF kernel, it implies the sigma parameter. In case of linear kernel, 142 // it is not used. 143 void SetKernelParam(double param) { 144 kernel_param_ = param; 145 } 146 // This function sets the kernel gain. NOTE: in most use cases, gain should 147 // be set to 1.0. 148 void SetKernelGain(double gain) { 149 kernel_gain_ = gain; 150 } 151 // This function sets the kernel bias. NOTE: in most use cases, bias should 152 // be set to 0.0. 153 void SetKernelBias(double bias) { 154 kernel_bias_ = bias; 155 } 156 void SetUpdateType(UpdateType u) { 157 update_type_ = u; 158 } 159 void SetRankLossType(RankLossType r) { 160 rank_loss_type_ = r; 161 } 162 void SetAcceptanceProbability(double p) { 163 acceptence_probability_ = p; 164 } 165 void SetGradientL0Norm(const int32 gradient_l0_norm) { 166 gradient_l0_norm_ = gradient_l0_norm; 167 } 168 // Load an existing model 169 void LoadWeights(const SparseWeightVector<Key, Hash> &model) { 170 weight_.LoadWeightVector(model); 171 } 172 // Save current model 173 void SaveWeights(SparseWeightVector<Key, Hash> *model) { 174 model->LoadWeightVector(weight_); 175 } 176 // Scoring 177 double ScoreSample(const SparseWeightVector<Key, Hash> &sample) { 178 const double dot = weight_.DotProduct(sample); 179 double w_square; 180 double s_square; 181 switch (kernel_type_) { 182 case LINEAR: 183 return dot; 184 case POLY: 185 return pow(kernel_gain_ * dot + kernel_bias_, kernel_param_); 186 case RBF: 187 w_square = weight_.L2Norm(); 188 s_square = sample.L2Norm(); 189 return exp(-1 * kernel_param_ * (w_square + s_square - 2 * dot)); 190 default: 191 ALOGE("unsupported kernel: %d", kernel_type_); 192 } 193 return -1; 194 } 195 // Learning Functions 196 // Return values: 197 // 1 :full update went through 198 // 2 :partial update went through (for SL only) 199 // 0 :no update necessary. 200 // -1:error. 201 int UpdateClassifier(const SparseWeightVector<Key, Hash> &positive, 202 const SparseWeightVector<Key, Hash> &negative); 203 204 private: 205 SparseWeightVector<Key, Hash> weight_; 206 double norm_constraint_; 207 double lambda_; 208 RegularizationType regularization_type_; 209 AdaptationMode adaptation_mode_; 210 UpdateType update_type_; 211 RankLossType rank_loss_type_; 212 KernelType kernel_type_; 213 // Kernel gain and bias are typically multiplicative and additive factors to 214 // the dot product while calculating the kernel function. Kernel param is 215 // kernel-specific. In case of polynomial kernel, it is the degree of the 216 // polynomial. 217 double kernel_param_; 218 double kernel_gain_; 219 double kernel_bias_; 220 double acceptence_probability_; 221 SparseWeightVector<Key, Hash> current_negative_; 222 LearningRateController learning_rate_controller_; 223 uint64 iteration_num_; 224 // We average out gradient updates for mini_batch_size_ samples 225 // before performing an iteration of the algorithm. 226 uint64 mini_batch_counter_; 227 uint64 mini_batch_size_; 228 // Specifies the number of non-zero entries allowed in a gradient. 229 // Default is -1 which means we take the gradient as given by data without 230 // adding any new constraints. positive number is treated as an L0 constraint 231 int32 gradient_l0_norm_; 232 // Sub-Gradient Updates 233 // Pure Sub-Gradient update without any reprojection 234 // Note that a form of L2 regularization is built into this 235 void UpdateSubGradient(const SparseWeightVector<Key, Hash> &positive, 236 const SparseWeightVector<Key, Hash> &negative, 237 const double learning_rate, 238 const double positive_score, 239 const double negative_score, 240 const int32 gradient_l0_norm); 241 242 }; 243 } // namespace learning_stochastic_linear 244 #endif // LEARNING_STOCHASTIC_LINEAR_STOCHASTIC_LINEAR_RANKER_H_ 245