1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "sparse_weight_vector.h" 18 19 #include <algorithm> 20 #include <list> 21 #include <vector> 22 #include <math.h> 23 24 using std::vector; 25 using std::list; 26 using std::max; 27 28 namespace learning_stochastic_linear { 29 30 // Max/Min permitted values of normalizer_ for preventing under/overflows. 31 static double kNormalizerMin = 1e-20; 32 static double kNormalizerMax = 1e20; 33 34 template<class Key, class Hash> 35 bool SparseWeightVector<Key, Hash>::IsValid() const { 36 if (isnan(normalizer_) || __isinff(normalizer_)) 37 return false; 38 for (Witer_const iter = w_.begin(); 39 iter != w_.end(); 40 ++iter) { 41 if (isnanf(iter->second) || __isinff(iter->second)) 42 return false; 43 } 44 return true; 45 } 46 47 template<class Key, class Hash> 48 void SparseWeightVector<Key, Hash>::AdditiveWeightUpdate( 49 const double multiplier, 50 const SparseWeightVector<Key, Hash> &w1, 51 const double additive_const) { 52 for (Witer_const iter = w1.w_.begin(); 53 iter != w1.w_.end(); 54 ++iter) { 55 w_[iter->first] += ((multiplier * iter->second) / w1.normalizer_ 56 + additive_const) * normalizer_; 57 } 58 return; 59 } 60 61 template<class Key, class Hash> 62 void SparseWeightVector<Key, Hash>::AdditiveSquaredWeightUpdate( 63 const double multiplier, 64 const SparseWeightVector<Key, Hash> &w1, 65 const double additive_const) { 66 for (Witer_const iter = w1.w_.begin(); 67 iter != w1.w_.end(); 68 ++iter) { 69 w_[iter->first] += ((multiplier * iter->second * iter->second) / 70 (w1.normalizer_ * w1.normalizer_) 71 + additive_const) * normalizer_; 72 } 73 return; 74 } 75 76 template<class Key, class Hash> 77 void SparseWeightVector<Key, Hash>::AdditiveInvSqrtWeightUpdate( 78 const double multiplier, 79 const SparseWeightVector<Key, Hash> &w1, 80 const double additive_const) { 81 for (Witer_const iter = w1.w_.begin(); 82 iter != w1.w_.end(); 83 ++iter) { 84 if(iter->second > 0.0) { 85 w_[iter->first] += ((multiplier * sqrt(w1.normalizer_)) / 86 (sqrt(iter->second)) 87 + additive_const) * normalizer_; 88 } 89 } 90 return; 91 } 92 93 template<class Key, class Hash> 94 void SparseWeightVector<Key, Hash>::AdditiveWeightUpdateBounded( 95 const double multiplier, 96 const SparseWeightVector<Key, Hash> &w1, 97 const double additive_const) { 98 double min_bound = 0; 99 double max_bound = 0; 100 for (Witer_const iter = w1.w_.begin(); 101 iter != w1.w_.end(); 102 ++iter) { 103 w_[iter->first] += ((multiplier * iter->second) / w1.normalizer_ 104 + additive_const) * normalizer_; 105 bool is_min_bounded = GetValue(wmin_, iter->first, &min_bound); 106 if (is_min_bounded) { 107 if ((w_[iter->first] / normalizer_) < min_bound) { 108 w_[iter->first] = min_bound*normalizer_; 109 continue; 110 } 111 } 112 bool is_max_bounded = GetValue(wmax_, iter->first, &max_bound); 113 if (is_max_bounded) { 114 if ((w_[iter->first] / normalizer_) > max_bound) 115 w_[iter->first] = max_bound*normalizer_; 116 } 117 } 118 return; 119 } 120 121 template<class Key, class Hash> 122 void SparseWeightVector<Key, Hash>::MultWeightUpdate( 123 const SparseWeightVector<Key, Hash> &w1) { 124 for (Witer iter = w_.begin(); 125 iter != w_.end(); 126 ++iter) { 127 iter->second *= w1.GetElement(iter->first); 128 } 129 normalizer_ *= w1.normalizer_; 130 return; 131 } 132 133 template<class Key, class Hash> 134 void SparseWeightVector<Key, Hash>::MultWeightUpdateBounded( 135 const SparseWeightVector<Key, Hash> &w1) { 136 double min_bound = 0; 137 double max_bound = 0; 138 139 normalizer_ *= w1.normalizer_; 140 for (Witer iter = w_.begin(); 141 iter != w_.end(); 142 ++iter) { 143 iter->second *= w1.GetElement(iter->first); 144 bool is_min_bounded = GetValue(wmin_, iter->first, &min_bound); 145 if (is_min_bounded) { 146 if ((iter->second / normalizer_) < min_bound) { 147 iter->second = min_bound*normalizer_; 148 continue; 149 } 150 } 151 bool is_max_bounded = GetValue(wmax_, iter->first, &max_bound); 152 if (is_max_bounded) { 153 if ((iter->second / normalizer_) > max_bound) 154 iter->second = max_bound*normalizer_; 155 } 156 } 157 return; 158 } 159 160 template<class Key, class Hash> 161 void SparseWeightVector<Key, Hash>::ResetNormalizer() { 162 for (Witer iter = w_.begin(); 163 iter != w_.end(); 164 ++iter) { 165 iter->second /= normalizer_; 166 } 167 normalizer_ = 1.0; 168 } 169 170 template<class Key, class Hash> 171 void SparseWeightVector<Key, Hash>::ReprojectToBounds() { 172 double min_bound = 0; 173 double max_bound = 0; 174 175 for (Witer iter = w_.begin(); 176 iter != w_.end(); 177 ++iter) { 178 bool is_min_bounded = GetValue(wmin_, iter->first, &min_bound); 179 if (is_min_bounded) { 180 if ((iter->second/normalizer_) < min_bound) { 181 iter->second = min_bound*normalizer_; 182 continue; 183 } 184 } 185 bool is_max_bounded = GetValue(wmax_, iter->first, &max_bound); 186 if (is_max_bounded) { 187 if ((iter->second/normalizer_) > max_bound) 188 iter->second = max_bound*normalizer_; 189 } 190 } 191 } 192 193 template<class Key, class Hash> 194 double SparseWeightVector<Key, Hash>::DotProduct( 195 const SparseWeightVector<Key, Hash> &w1) const { 196 double result = 0; 197 if (w_.size() > w1.w_.size()) { 198 for (Witer_const iter = w1.w_.begin(); 199 iter != w1.w_.end(); 200 ++iter) { 201 result += iter->second * GetElement(iter->first); 202 } 203 result /= (this->normalizer_ * w1.normalizer_); 204 } else { 205 for (Witer_const iter = w_.begin(); 206 iter != w_.end(); 207 ++iter) { 208 result += iter->second * w1.GetElement(iter->first); 209 } 210 result /= (this->normalizer_ * w1.normalizer_); 211 } 212 return result; 213 } 214 215 template<class Key, class Hash> 216 double SparseWeightVector<Key, Hash>::LxNorm(const double x) const { 217 double result = 0; 218 CHECK_GT(x, 0); 219 for (Witer_const iter = w_.begin(); 220 iter != w_.end(); 221 ++iter) { 222 result += pow(iter->second, x); 223 } 224 return (pow(result, 1.0 / x) / normalizer_); 225 } 226 227 template<class Key, class Hash> 228 double SparseWeightVector<Key, Hash>::L2Norm() const { 229 double result = 0; 230 for (Witer_const iter = w_.begin(); 231 iter != w_.end(); 232 ++iter) { 233 result += iter->second * iter->second; 234 } 235 return sqrt(result)/normalizer_; 236 } 237 238 template<class Key, class Hash> 239 double SparseWeightVector<Key, Hash>::L1Norm() const { 240 double result = 0; 241 for (Witer_const iter = w_.begin(); 242 iter != w_.end(); 243 ++iter) { 244 result += fabs(iter->second); 245 } 246 return result / normalizer_; 247 } 248 249 template<class Key, class Hash> 250 double SparseWeightVector<Key, Hash>::L0Norm( 251 const double epsilon) const { 252 double result = 0; 253 for (Witer_const iter = w_.begin(); 254 iter != w_.end(); 255 ++iter) { 256 if (fabs(iter->second / normalizer_) > epsilon) 257 ++result; 258 } 259 return result; 260 } 261 262 // Algorithm for L0 projection which takes O(n log(n)), where n is 263 // the number of non-zero elements in the vector. 264 template<class Key, class Hash> 265 void SparseWeightVector<Key, Hash>::ReprojectL0(const double l0_norm) { 266 // First calculates the order-statistics of the sparse vector 267 // and then reprojects to the L0 orthant with the requested norm. 268 CHECK_GT(l0_norm, 0); 269 uint64 req_l0_norm = static_cast<uint64>(l0_norm); 270 // Compute order statistics and the current L0 norm. 271 vector<double> abs_val_vec; 272 uint64 curr_l0_norm = 0; 273 const double epsilone = 1E-05; 274 for (Witer iter = w_.begin(); 275 iter != w_.end(); 276 ++iter) { 277 if (fabs(iter->second/normalizer_) > epsilone) { 278 abs_val_vec.push_back(fabs(iter->second/normalizer_)); 279 ++curr_l0_norm; 280 } 281 } 282 // check if a projection is necessary 283 if (curr_l0_norm < req_l0_norm) { 284 return; 285 } 286 std::nth_element(&abs_val_vec[0], 287 &abs_val_vec[curr_l0_norm - req_l0_norm], 288 &abs_val_vec[curr_l0_norm]); 289 const double theta = abs_val_vec[curr_l0_norm - req_l0_norm]; 290 // compute the final projection. 291 for (Witer iter = w_.begin(); 292 iter != w_.end(); 293 ++iter) { 294 if ((fabs(iter->second/normalizer_) - theta) < 0) { 295 iter->second = 0; 296 } 297 } 298 } 299 300 // Slow algorithm for accurate L1 projection which takes O(n log(n)), where n is 301 // the number of non-zero elements in the vector. 302 template<class Key, class Hash> 303 void SparseWeightVector<Key, Hash>::ReprojectL1(const double l1_norm) { 304 // First calculates the order-statistics of the sparse vector 305 // applies a probability simplex projection to the abs(vector) 306 // and reprojects back to the original with the appropriate sign. 307 // For ref. see "Efficient Projections into the l1-ball for Learning 308 // in High Dimensions" 309 CHECK_GT(l1_norm, 0); 310 // Compute order statistics and the current L1 norm. 311 list<double> abs_val_list; 312 double curr_l1_norm = 0; 313 for (Witer iter = w_.begin(); 314 iter != w_.end(); 315 ++iter) { 316 abs_val_list.push_back(fabs(iter->second/normalizer_)); 317 curr_l1_norm += fabs(iter->second/normalizer_); 318 } 319 // check if a projection is necessary 320 if (curr_l1_norm < l1_norm) { 321 return; 322 } 323 abs_val_list.sort(); 324 abs_val_list.reverse(); 325 // Compute projection on the probability simplex. 326 double curr_index = 1; 327 double theta = 0; 328 double cum_sum = 0; 329 for (list<double>::iterator val_iter = abs_val_list.begin(); 330 val_iter != abs_val_list.end(); 331 ++val_iter) { 332 cum_sum += *val_iter; 333 theta = (cum_sum - l1_norm)/curr_index; 334 if (((*val_iter) - theta) <= 0) { 335 break; 336 } 337 ++curr_index; 338 } 339 // compute the final projection. 340 for (Witer iter = w_.begin(); 341 iter != w_.end(); 342 ++iter) { 343 int sign_mul = iter->second > 0; 344 iter->second = max(sign_mul * normalizer_ * 345 (fabs(iter->second/normalizer_) - theta), 346 0.0); 347 } 348 } 349 350 template<class Key, class Hash> 351 void SparseWeightVector<Key, Hash>::ReprojectL2(const double l2_norm) { 352 CHECK_GT(l2_norm, 0); 353 double curr_l2_norm = L2Norm(); 354 // Check if a projection is necessary. 355 if (curr_l2_norm > l2_norm) { 356 normalizer_ *= curr_l2_norm / l2_norm; 357 } 358 } 359 360 template<class Key, class Hash> 361 int32 SparseWeightVector<Key, Hash>::Reproject(const double norm, 362 const RegularizationType r) { 363 CHECK_GT(norm, 0); 364 if (r == L0) { 365 ReprojectL0(norm); 366 } else if (r == L1) { 367 ReprojectL1(norm); 368 } else if (r == L2) { 369 ReprojectL2(norm); 370 } else { 371 // This else is just to ensure that if other RegularizationTypes are 372 // supported in the enum later which require manipulations not related 373 // to SparseWeightVector then we catch the accidental argument here. 374 ALOGE("Unsupported regularization type requested"); 375 return -1; 376 } 377 // If the normalizer gets dangerously large or small, normalize the 378 // entire vector. This stops projections from sending the vector 379 // weights and the normalizer simultaneously all very small or 380 // large, causing under/over flows. But if you hit this too often 381 // it's a sign you've chosen a bad lambda. 382 if (normalizer_ < kNormalizerMin) { 383 ALOGE("Resetting normalizer to 1.0 to prevent underflow. " 384 "Is lambda too large?"); 385 ResetNormalizer(); 386 } 387 if (normalizer_ > kNormalizerMax) { 388 ALOGE("Resetting normalizer to 1.0 to prevent overflow. " 389 "Is lambda too small?"); 390 ResetNormalizer(); 391 } 392 return 0; 393 } 394 395 template class SparseWeightVector<std::string, std::unordered_map<std::string, double> >; 396 template class SparseWeightVector<int, std::unordered_map<int, double> >; 397 template class SparseWeightVector<uint64, std::unordered_map<uint64, double> >; 398 } // namespace learning_stochastic_linear 399