1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "utils/math/softmax.h" 18 19 #include <limits> 20 21 #include "utils/base/logging.h" 22 #include "utils/math/fastexp.h" 23 24 namespace libtextclassifier3 { 25 26 float ComputeSoftmaxProbability(const std::vector<float> &scores, int label) { 27 if ((label < 0) || (label >= scores.size())) { 28 TC3_LOG(ERROR) << "label " << label << " outside range " 29 << "[0, " << scores.size() << ")"; 30 return 0.0f; 31 } 32 33 // Standard softmax formula for label's probability is 34 // 35 // exp(scores[label]) / sum_i exp(scores[i]) 36 // 37 // We compute the mathematically equivalent 38 // 39 // 1 / (1 + sum_{i != label} exp(scores[i] - scores[label])) 40 // 41 // which saves two calls to exp(). 42 const float label_score = scores[label]; 43 float denominator = 1.0f; // Contribution of i == label. 44 for (int i = 0; i < scores.size(); ++i) { 45 if (i == label) continue; 46 const float delta_score = scores[i] - label_score; 47 48 // TODO(salcianu): one can optimize the test below, to avoid any float 49 // operation: extract exponent (via bit mask + shift) and check it's >= 4. 50 if (fabs(delta_score) >= 16.0f) { 51 if (delta_score > 0.0f) { 52 // If delta_score >= 16, the denominator (e^delta_score + other positive 53 // terms) is very big and its inverse can be approximated with 0. 54 return 0.0f; 55 } else { 56 // If delta_score <= -16, then e^delta_score < 1.2e-7. Even if we have 57 // 1000 such labels i, their sum is < 1.2e-4 (which gets summed with 58 // 1.0f for i == label). Hence, we can approximate each such label with 59 // 0 and skip the call to VeryFastExp and the update to denominator. 60 continue; 61 } 62 } 63 64 // At this point, delta_score is in (-16.0, 16.0). For such values, vfexp 65 // works fine: no under/overflows (we have tests for that in fastexp_test). 66 // Also, even for 1000 labels, denominator will not overflow. 67 denominator += VeryFastExp(delta_score); 68 } 69 return 1.0f / denominator; 70 } 71 72 std::vector<float> ComputeSoftmax(const std::vector<float> &scores) { 73 return ComputeSoftmax(scores.data(), scores.size()); 74 } 75 76 std::vector<float> ComputeSoftmax(const float *scores, int scores_size) { 77 std::vector<float> softmax; 78 std::vector<float> exp_scores; 79 exp_scores.reserve(scores_size); 80 softmax.reserve(scores_size); 81 82 // Find max value in "scores" vector and rescale to avoid overflows. 83 float max = std::numeric_limits<float>::min(); 84 for (int i = 0; i < scores_size; ++i) { 85 const float score = scores[i]; 86 if (score > max) max = score; 87 } 88 float denominator = 0; 89 for (int i = 0; i < scores_size; ++i) { 90 const float score = scores[i]; 91 // See comments above in ComputeSoftmaxProbability for the reasoning behind 92 // this approximation. 93 const float exp_score = score - max < -16.0f ? 0 : VeryFastExp(score - max); 94 exp_scores.push_back(exp_score); 95 denominator += exp_score; 96 } 97 98 for (int i = 0; i < scores_size; ++i) { 99 softmax.push_back(exp_scores[i] / denominator); 100 } 101 return softmax; 102 } 103 104 } // namespace libtextclassifier3 105