Home | History | Annotate | Download | only in native
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "sparse_weight_vector.h"
     18 
     19 #include <algorithm>
     20 #include <list>
     21 #include <vector>
     22 #include <math.h>
     23 
     24 using std::vector;
     25 using std::list;
     26 using std::max;
     27 
     28 namespace learning_stochastic_linear {
     29 
     30 // Max/Min permitted values of normalizer_ for preventing under/overflows.
     31 static double kNormalizerMin = 1e-20;
     32 static double kNormalizerMax = 1e20;
     33 
     34 template<class Key, class Hash>
     35 bool SparseWeightVector<Key, Hash>::IsValid() const {
     36   if (isnan(normalizer_) || __isinff(normalizer_))
     37     return false;
     38   for (Witer_const iter = w_.begin();
     39        iter != w_.end();
     40        ++iter) {
     41     if (isnanf(iter->second) || __isinff(iter->second))
     42       return false;
     43   }
     44   return true;
     45 }
     46 
     47 template<class Key, class Hash>
     48 void SparseWeightVector<Key, Hash>::AdditiveWeightUpdate(
     49     const double multiplier,
     50     const SparseWeightVector<Key, Hash> &w1,
     51     const double additive_const) {
     52   for (Witer_const iter = w1.w_.begin();
     53       iter != w1.w_.end();
     54       ++iter) {
     55     w_[iter->first] += ((multiplier * iter->second) / w1.normalizer_
     56                         + additive_const) * normalizer_;
     57   }
     58   return;
     59 }
     60 
     61 template<class Key, class Hash>
     62 void SparseWeightVector<Key, Hash>::AdditiveSquaredWeightUpdate(
     63     const double multiplier,
     64     const SparseWeightVector<Key, Hash> &w1,
     65     const double additive_const) {
     66   for (Witer_const iter = w1.w_.begin();
     67       iter != w1.w_.end();
     68       ++iter) {
     69     w_[iter->first] += ((multiplier * iter->second * iter->second) /
     70                           (w1.normalizer_ * w1.normalizer_)
     71                         + additive_const) * normalizer_;
     72   }
     73   return;
     74 }
     75 
     76 template<class Key, class Hash>
     77 void SparseWeightVector<Key, Hash>::AdditiveInvSqrtWeightUpdate(
     78     const double multiplier,
     79     const SparseWeightVector<Key, Hash> &w1,
     80     const double additive_const) {
     81   for (Witer_const iter = w1.w_.begin();
     82       iter != w1.w_.end();
     83       ++iter) {
     84     if(iter->second > 0.0) {
     85       w_[iter->first] += ((multiplier * sqrt(w1.normalizer_)) /
     86                           (sqrt(iter->second))
     87                           + additive_const) * normalizer_;
     88     }
     89   }
     90   return;
     91 }
     92 
     93 template<class Key, class Hash>
     94 void SparseWeightVector<Key, Hash>::AdditiveWeightUpdateBounded(
     95     const double multiplier,
     96     const SparseWeightVector<Key, Hash> &w1,
     97     const double additive_const) {
     98   double min_bound = 0;
     99   double max_bound = 0;
    100   for (Witer_const iter = w1.w_.begin();
    101       iter != w1.w_.end();
    102       ++iter) {
    103     w_[iter->first] += ((multiplier * iter->second) / w1.normalizer_
    104                         + additive_const) * normalizer_;
    105     bool is_min_bounded = GetValue(wmin_, iter->first, &min_bound);
    106     if (is_min_bounded) {
    107       if ((w_[iter->first] / normalizer_) < min_bound) {
    108         w_[iter->first] = min_bound*normalizer_;
    109         continue;
    110       }
    111     }
    112     bool is_max_bounded = GetValue(wmax_, iter->first, &max_bound);
    113     if (is_max_bounded) {
    114       if ((w_[iter->first] / normalizer_) > max_bound)
    115         w_[iter->first] = max_bound*normalizer_;
    116     }
    117   }
    118   return;
    119 }
    120 
    121 template<class Key, class Hash>
    122 void SparseWeightVector<Key, Hash>::MultWeightUpdate(
    123     const SparseWeightVector<Key, Hash> &w1) {
    124   for (Witer iter = w_.begin();
    125       iter != w_.end();
    126       ++iter) {
    127     iter->second *= w1.GetElement(iter->first);
    128   }
    129   normalizer_ *= w1.normalizer_;
    130   return;
    131 }
    132 
    133 template<class Key, class Hash>
    134 void SparseWeightVector<Key, Hash>::MultWeightUpdateBounded(
    135     const SparseWeightVector<Key, Hash> &w1) {
    136   double min_bound = 0;
    137   double max_bound = 0;
    138 
    139   normalizer_ *= w1.normalizer_;
    140   for (Witer iter = w_.begin();
    141       iter != w_.end();
    142       ++iter) {
    143     iter->second *= w1.GetElement(iter->first);
    144     bool is_min_bounded = GetValue(wmin_, iter->first, &min_bound);
    145     if (is_min_bounded) {
    146       if ((iter->second / normalizer_) < min_bound) {
    147         iter->second = min_bound*normalizer_;
    148         continue;
    149       }
    150     }
    151     bool is_max_bounded = GetValue(wmax_, iter->first, &max_bound);
    152     if (is_max_bounded) {
    153       if ((iter->second / normalizer_) > max_bound)
    154         iter->second = max_bound*normalizer_;
    155     }
    156   }
    157   return;
    158 }
    159 
    160 template<class Key, class Hash>
    161 void SparseWeightVector<Key, Hash>::ResetNormalizer() {
    162   for (Witer iter = w_.begin();
    163        iter != w_.end();
    164        ++iter) {
    165     iter->second /= normalizer_;
    166   }
    167   normalizer_ = 1.0;
    168 }
    169 
    170 template<class Key, class Hash>
    171 void SparseWeightVector<Key, Hash>::ReprojectToBounds() {
    172   double min_bound = 0;
    173   double max_bound = 0;
    174 
    175   for (Witer iter = w_.begin();
    176        iter != w_.end();
    177        ++iter) {
    178     bool is_min_bounded = GetValue(wmin_, iter->first, &min_bound);
    179     if (is_min_bounded) {
    180       if ((iter->second/normalizer_) < min_bound) {
    181         iter->second = min_bound*normalizer_;
    182         continue;
    183       }
    184     }
    185     bool is_max_bounded = GetValue(wmax_, iter->first, &max_bound);
    186     if (is_max_bounded) {
    187       if ((iter->second/normalizer_) > max_bound)
    188         iter->second = max_bound*normalizer_;
    189     }
    190   }
    191 }
    192 
    193 template<class Key, class Hash>
    194 double SparseWeightVector<Key, Hash>::DotProduct(
    195     const SparseWeightVector<Key, Hash> &w1) const {
    196   double result = 0;
    197   if (w_.size() > w1.w_.size()) {
    198     for (Witer_const iter = w1.w_.begin();
    199         iter != w1.w_.end();
    200         ++iter) {
    201       result += iter->second * GetElement(iter->first);
    202     }
    203     result /= (this->normalizer_ * w1.normalizer_);
    204   } else {
    205     for (Witer_const iter = w_.begin();
    206         iter != w_.end();
    207         ++iter) {
    208       result += iter->second * w1.GetElement(iter->first);
    209     }
    210     result /= (this->normalizer_ * w1.normalizer_);
    211   }
    212   return result;
    213 }
    214 
    215 template<class Key, class Hash>
    216 double SparseWeightVector<Key, Hash>::LxNorm(const double x) const {
    217   double result = 0;
    218   CHECK_GT(x, 0);
    219   for (Witer_const iter = w_.begin();
    220       iter != w_.end();
    221       ++iter) {
    222     result += pow(iter->second, x);
    223   }
    224   return (pow(result, 1.0 / x) / normalizer_);
    225 }
    226 
    227 template<class Key, class Hash>
    228 double SparseWeightVector<Key, Hash>::L2Norm() const {
    229   double result = 0;
    230   for (Witer_const iter = w_.begin();
    231       iter != w_.end();
    232       ++iter) {
    233     result += iter->second * iter->second;
    234   }
    235   return sqrt(result)/normalizer_;
    236 }
    237 
    238 template<class Key, class Hash>
    239 double SparseWeightVector<Key, Hash>::L1Norm() const {
    240   double result = 0;
    241   for (Witer_const iter = w_.begin();
    242       iter != w_.end();
    243       ++iter) {
    244     result += fabs(iter->second);
    245   }
    246   return result / normalizer_;
    247 }
    248 
    249 template<class Key, class Hash>
    250 double SparseWeightVector<Key, Hash>::L0Norm(
    251     const double epsilon) const {
    252   double result = 0;
    253   for (Witer_const iter = w_.begin();
    254       iter != w_.end();
    255       ++iter) {
    256     if (fabs(iter->second / normalizer_) > epsilon)
    257       ++result;
    258   }
    259   return result;
    260 }
    261 
    262 // Algorithm for L0 projection which takes O(n log(n)), where n is
    263 // the number of non-zero elements in the vector.
    264 template<class Key, class Hash>
    265 void SparseWeightVector<Key, Hash>::ReprojectL0(const double l0_norm) {
    266 // First calculates the order-statistics of the sparse vector
    267 // and then reprojects to the L0 orthant with the requested norm.
    268   CHECK_GT(l0_norm, 0);
    269   uint64 req_l0_norm = static_cast<uint64>(l0_norm);
    270   // Compute order statistics and the current L0 norm.
    271   vector<double> abs_val_vec;
    272   uint64 curr_l0_norm = 0;
    273   const double epsilone = 1E-05;
    274   for (Witer iter = w_.begin();
    275       iter != w_.end();
    276       ++iter) {
    277     if (fabs(iter->second/normalizer_) > epsilone) {
    278       abs_val_vec.push_back(fabs(iter->second/normalizer_));
    279       ++curr_l0_norm;
    280     }
    281   }
    282   // check if a projection is necessary
    283   if (curr_l0_norm < req_l0_norm) {
    284     return;
    285   }
    286   std::nth_element(&abs_val_vec[0],
    287               &abs_val_vec[curr_l0_norm - req_l0_norm],
    288               &abs_val_vec[curr_l0_norm]);
    289   const double theta = abs_val_vec[curr_l0_norm - req_l0_norm];
    290   // compute the final projection.
    291   for (Witer iter = w_.begin();
    292       iter != w_.end();
    293       ++iter) {
    294     if ((fabs(iter->second/normalizer_) - theta) < 0) {
    295       iter->second = 0;
    296     }
    297   }
    298 }
    299 
    300 // Slow algorithm for accurate L1 projection which takes O(n log(n)), where n is
    301 // the number of non-zero elements in the vector.
    302 template<class Key, class Hash>
    303 void SparseWeightVector<Key, Hash>::ReprojectL1(const double l1_norm) {
    304 // First calculates the order-statistics of the sparse vector
    305 // applies a probability simplex projection to the abs(vector)
    306 // and reprojects back to the original with the appropriate sign.
    307 // For ref. see "Efficient Projections into the l1-ball for Learning
    308 // in High Dimensions"
    309   CHECK_GT(l1_norm, 0);
    310   // Compute order statistics and the current L1 norm.
    311   list<double> abs_val_list;
    312   double curr_l1_norm = 0;
    313   for (Witer iter = w_.begin();
    314       iter != w_.end();
    315       ++iter) {
    316     abs_val_list.push_back(fabs(iter->second/normalizer_));
    317     curr_l1_norm += fabs(iter->second/normalizer_);
    318   }
    319   // check if a projection is necessary
    320   if (curr_l1_norm < l1_norm) {
    321     return;
    322   }
    323   abs_val_list.sort();
    324   abs_val_list.reverse();
    325   // Compute projection on the probability simplex.
    326   double curr_index = 1;
    327   double theta = 0;
    328   double cum_sum = 0;
    329   for (list<double>::iterator val_iter = abs_val_list.begin();
    330        val_iter != abs_val_list.end();
    331        ++val_iter) {
    332     cum_sum += *val_iter;
    333     theta = (cum_sum - l1_norm)/curr_index;
    334     if (((*val_iter) - theta) <= 0) {
    335       break;
    336     }
    337     ++curr_index;
    338   }
    339   // compute the final projection.
    340   for (Witer iter = w_.begin();
    341       iter != w_.end();
    342       ++iter) {
    343     int sign_mul = iter->second > 0;
    344     iter->second = max(sign_mul * normalizer_ *
    345                            (fabs(iter->second/normalizer_) - theta),
    346                        0.0);
    347   }
    348 }
    349 
    350 template<class Key, class Hash>
    351 void SparseWeightVector<Key, Hash>::ReprojectL2(const double l2_norm) {
    352   CHECK_GT(l2_norm, 0);
    353   double curr_l2_norm = L2Norm();
    354   // Check if a projection is necessary.
    355   if (curr_l2_norm > l2_norm) {
    356     normalizer_ *= curr_l2_norm / l2_norm;
    357   }
    358 }
    359 
    360 template<class Key, class Hash>
    361 int32 SparseWeightVector<Key, Hash>::Reproject(const double norm,
    362                                                const RegularizationType r) {
    363   CHECK_GT(norm, 0);
    364   if (r == L0) {
    365     ReprojectL0(norm);
    366   } else if (r == L1) {
    367     ReprojectL1(norm);
    368   } else if (r == L2) {
    369     ReprojectL2(norm);
    370   } else {
    371     // This else is just to ensure that if other RegularizationTypes are
    372     // supported in the enum later which require manipulations not related
    373     // to SparseWeightVector then we catch the accidental argument here.
    374     ALOGE("Unsupported regularization type requested");
    375     return -1;
    376   }
    377   // If the normalizer gets dangerously large or small, normalize the
    378   // entire vector. This stops projections from sending the vector
    379   // weights and the normalizer simultaneously all very small or
    380   // large, causing under/over flows. But if you hit this too often
    381   // it's a sign you've chosen a bad lambda.
    382   if (normalizer_ < kNormalizerMin) {
    383     ALOGE("Resetting normalizer to 1.0 to prevent underflow. "
    384           "Is lambda too large?");
    385     ResetNormalizer();
    386   }
    387   if (normalizer_ > kNormalizerMax) {
    388     ALOGE("Resetting normalizer to 1.0 to prevent overflow. "
    389           "Is lambda too small?");
    390     ResetNormalizer();
    391   }
    392   return 0;
    393 }
    394 
    395 template class SparseWeightVector<std::string, std::unordered_map<std::string, double> >;
    396 template class SparseWeightVector<int, std::unordered_map<int, double> >;
    397 template class SparseWeightVector<uint64, std::unordered_map<uint64, double> >;
    398 }  // namespace learning_stochastic_linear
    399