1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/core/lib/histogram/histogram.h" 17 #include <float.h> 18 #include <math.h> 19 #include <vector> 20 #include "tensorflow/core/framework/summary.pb.h" 21 22 #include "tensorflow/core/platform/logging.h" 23 #include "tensorflow/core/platform/mutex.h" 24 #include "tensorflow/core/platform/types.h" 25 namespace tensorflow { 26 namespace histogram { 27 28 static std::vector<double>* InitDefaultBucketsInner() { 29 std::vector<double> buckets; 30 std::vector<double> neg_buckets; 31 // Make buckets whose range grows by 10% starting at 1.0e-12 up to 1.0e20 32 double v = 1.0e-12; 33 while (v < 1.0e20) { 34 buckets.push_back(v); 35 neg_buckets.push_back(-v); 36 v *= 1.1; 37 } 38 buckets.push_back(DBL_MAX); 39 neg_buckets.push_back(-DBL_MAX); 40 std::reverse(neg_buckets.begin(), neg_buckets.end()); 41 std::vector<double>* result = new std::vector<double>; 42 result->insert(result->end(), neg_buckets.begin(), neg_buckets.end()); 43 result->push_back(0.0); 44 result->insert(result->end(), buckets.begin(), buckets.end()); 45 return result; 46 } 47 48 static gtl::ArraySlice<double> InitDefaultBuckets() { 49 static std::vector<double>* default_bucket_limits = InitDefaultBucketsInner(); 50 return *default_bucket_limits; 51 } 52 53 Histogram::Histogram() : bucket_limits_(InitDefaultBuckets()) { Clear(); } 54 55 // Create a histogram with a custom set of bucket limits, 56 // specified in "custom_buckets[0..custom_buckets.size()-1]" 57 Histogram::Histogram(gtl::ArraySlice<double> custom_bucket_limits) 58 : custom_bucket_limits_(custom_bucket_limits.begin(), 59 custom_bucket_limits.end()), 60 bucket_limits_(custom_bucket_limits_) { 61 #ifndef NDEBUG 62 DCHECK_GT(bucket_limits_.size(), size_t{0}); 63 // Verify that the bucket boundaries are strictly increasing 64 for (size_t i = 1; i < bucket_limits_.size(); i++) { 65 DCHECK_GT(bucket_limits_[i], bucket_limits_[i - 1]); 66 } 67 #endif 68 Clear(); 69 } 70 71 bool Histogram::DecodeFromProto(const HistogramProto& proto) { 72 if ((proto.bucket_size() != proto.bucket_limit_size()) || 73 (proto.bucket_size() == 0)) { 74 return false; 75 } 76 min_ = proto.min(); 77 max_ = proto.max(); 78 num_ = proto.num(); 79 sum_ = proto.sum(); 80 sum_squares_ = proto.sum_squares(); 81 custom_bucket_limits_.clear(); 82 custom_bucket_limits_.insert(custom_bucket_limits_.end(), 83 proto.bucket_limit().begin(), 84 proto.bucket_limit().end()); 85 bucket_limits_ = custom_bucket_limits_; 86 buckets_.clear(); 87 buckets_.insert(buckets_.end(), proto.bucket().begin(), proto.bucket().end()); 88 return true; 89 } 90 91 void Histogram::Clear() { 92 min_ = bucket_limits_[bucket_limits_.size() - 1]; 93 max_ = -DBL_MAX; 94 num_ = 0; 95 sum_ = 0; 96 sum_squares_ = 0; 97 buckets_.resize(bucket_limits_.size()); 98 for (size_t i = 0; i < bucket_limits_.size(); i++) { 99 buckets_[i] = 0; 100 } 101 } 102 103 void Histogram::Add(double value) { 104 int b = 105 std::upper_bound(bucket_limits_.begin(), bucket_limits_.end(), value) - 106 bucket_limits_.begin(); 107 108 buckets_[b] += 1.0; 109 if (min_ > value) min_ = value; 110 if (max_ < value) max_ = value; 111 num_++; 112 sum_ += value; 113 sum_squares_ += (value * value); 114 } 115 116 double Histogram::Median() const { return Percentile(50.0); } 117 118 // Linearly map the variable x from [x0, x1] unto [y0, y1] 119 double Histogram::Remap(double x, double x0, double x1, double y0, 120 double y1) const { 121 return y0 + (x - x0) / (x1 - x0) * (y1 - y0); 122 } 123 124 // Pick tight left-hand-side and right-hand-side bounds and then 125 // interpolate a histogram value at percentile p 126 double Histogram::Percentile(double p) const { 127 if (num_ == 0.0) return 0.0; 128 129 double threshold = num_ * (p / 100.0); 130 double cumsum_prev = 0; 131 for (size_t i = 0; i < buckets_.size(); i++) { 132 double cumsum = cumsum_prev + buckets_[i]; 133 134 // Find the first bucket whose cumsum >= threshold 135 if (cumsum >= threshold) { 136 // Prevent divide by 0 in remap which happens if cumsum == cumsum_prev 137 // This should only get hit when p == 0, cumsum == 0, and cumsum_prev == 0 138 if (cumsum == cumsum_prev) { 139 continue; 140 } 141 142 // Calculate the lower bound of interpolation 143 double lhs = (i == 0 || cumsum_prev == 0) ? min_ : bucket_limits_[i - 1]; 144 lhs = std::max(lhs, min_); 145 146 // Calculate the upper bound of interpolation 147 double rhs = bucket_limits_[i]; 148 rhs = std::min(rhs, max_); 149 150 double weight = Remap(threshold, cumsum_prev, cumsum, lhs, rhs); 151 return weight; 152 } 153 154 cumsum_prev = cumsum; 155 } 156 return max_; 157 } 158 159 double Histogram::Average() const { 160 if (num_ == 0.0) return 0; 161 return sum_ / num_; 162 } 163 164 double Histogram::StandardDeviation() const { 165 if (num_ == 0.0) return 0; 166 double variance = (sum_squares_ * num_ - sum_ * sum_) / (num_ * num_); 167 return sqrt(variance); 168 } 169 170 std::string Histogram::ToString() const { 171 std::string r; 172 char buf[200]; 173 snprintf(buf, sizeof(buf), "Count: %.0f Average: %.4f StdDev: %.2f\n", num_, 174 Average(), StandardDeviation()); 175 r.append(buf); 176 snprintf(buf, sizeof(buf), "Min: %.4f Median: %.4f Max: %.4f\n", 177 (num_ == 0.0 ? 0.0 : min_), Median(), max_); 178 r.append(buf); 179 r.append("------------------------------------------------------\n"); 180 const double mult = num_ > 0 ? 100.0 / num_ : 0.0; 181 double sum = 0; 182 for (size_t b = 0; b < buckets_.size(); b++) { 183 if (buckets_[b] <= 0.0) continue; 184 sum += buckets_[b]; 185 snprintf(buf, sizeof(buf), "[ %10.2g, %10.2g ) %7.0f %7.3f%% %7.3f%% ", 186 ((b == 0) ? -DBL_MAX : bucket_limits_[b - 1]), // left 187 bucket_limits_[b], // right 188 buckets_[b], // count 189 mult * buckets_[b], // percentage 190 mult * sum); // cum percentage 191 r.append(buf); 192 193 // Add hash marks based on percentage; 20 marks for 100%. 194 int marks = static_cast<int>(20 * (buckets_[b] / num_) + 0.5); 195 r.append(marks, '#'); 196 r.push_back('\n'); 197 } 198 return r; 199 } 200 201 void Histogram::EncodeToProto(HistogramProto* proto, 202 bool preserve_zero_buckets) const { 203 proto->Clear(); 204 proto->set_min(min_); 205 proto->set_max(max_); 206 proto->set_num(num_); 207 proto->set_sum(sum_); 208 proto->set_sum_squares(sum_squares_); 209 for (size_t i = 0; i < buckets_.size();) { 210 double end = bucket_limits_[i]; 211 double count = buckets_[i]; 212 i++; 213 if (!preserve_zero_buckets && count <= 0.0) { 214 // Find run of empty buckets and collapse them into one 215 while (i < buckets_.size() && buckets_[i] <= 0.0) { 216 end = bucket_limits_[i]; 217 count = buckets_[i]; 218 i++; 219 } 220 } 221 proto->add_bucket_limit(end); 222 proto->add_bucket(count); 223 } 224 if (proto->bucket_size() == 0.0) { 225 // It's easier when we restore if we always have at least one bucket entry 226 proto->add_bucket_limit(DBL_MAX); 227 proto->add_bucket(0.0); 228 } 229 } 230 231 // ThreadSafeHistogram implementation. 232 bool ThreadSafeHistogram::DecodeFromProto(const HistogramProto& proto) { 233 mutex_lock l(mu_); 234 return histogram_.DecodeFromProto(proto); 235 } 236 237 void ThreadSafeHistogram::Clear() { 238 mutex_lock l(mu_); 239 histogram_.Clear(); 240 } 241 242 void ThreadSafeHistogram::Add(double value) { 243 mutex_lock l(mu_); 244 histogram_.Add(value); 245 } 246 247 void ThreadSafeHistogram::EncodeToProto(HistogramProto* proto, 248 bool preserve_zero_buckets) const { 249 mutex_lock l(mu_); 250 histogram_.EncodeToProto(proto, preserve_zero_buckets); 251 } 252 253 double ThreadSafeHistogram::Median() const { 254 mutex_lock l(mu_); 255 return histogram_.Median(); 256 } 257 258 double ThreadSafeHistogram::Percentile(double p) const { 259 mutex_lock l(mu_); 260 return histogram_.Percentile(p); 261 } 262 263 double ThreadSafeHistogram::Average() const { 264 mutex_lock l(mu_); 265 return histogram_.Average(); 266 } 267 268 double ThreadSafeHistogram::StandardDeviation() const { 269 mutex_lock l(mu_); 270 return histogram_.StandardDeviation(); 271 } 272 273 std::string ThreadSafeHistogram::ToString() const { 274 mutex_lock l(mu_); 275 return histogram_.ToString(); 276 } 277 278 } // namespace histogram 279 } // namespace tensorflow 280