Home | History | Annotate | Download | only in histogram
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/core/lib/histogram/histogram.h"
     17 #include <float.h>
     18 #include <math.h>
     19 #include <vector>
     20 #include "tensorflow/core/framework/summary.pb.h"
     21 
     22 #include "tensorflow/core/platform/logging.h"
     23 #include "tensorflow/core/platform/mutex.h"
     24 #include "tensorflow/core/platform/types.h"
     25 namespace tensorflow {
     26 namespace histogram {
     27 
     28 static std::vector<double>* InitDefaultBucketsInner() {
     29   std::vector<double> buckets;
     30   std::vector<double> neg_buckets;
     31   // Make buckets whose range grows by 10% starting at 1.0e-12 up to 1.0e20
     32   double v = 1.0e-12;
     33   while (v < 1.0e20) {
     34     buckets.push_back(v);
     35     neg_buckets.push_back(-v);
     36     v *= 1.1;
     37   }
     38   buckets.push_back(DBL_MAX);
     39   neg_buckets.push_back(-DBL_MAX);
     40   std::reverse(neg_buckets.begin(), neg_buckets.end());
     41   std::vector<double>* result = new std::vector<double>;
     42   result->insert(result->end(), neg_buckets.begin(), neg_buckets.end());
     43   result->push_back(0.0);
     44   result->insert(result->end(), buckets.begin(), buckets.end());
     45   return result;
     46 }
     47 
     48 static gtl::ArraySlice<double> InitDefaultBuckets() {
     49   static std::vector<double>* default_bucket_limits = InitDefaultBucketsInner();
     50   return *default_bucket_limits;
     51 }
     52 
     53 Histogram::Histogram() : bucket_limits_(InitDefaultBuckets()) { Clear(); }
     54 
     55 // Create a histogram with a custom set of bucket limits,
     56 // specified in "custom_buckets[0..custom_buckets.size()-1]"
     57 Histogram::Histogram(gtl::ArraySlice<double> custom_bucket_limits)
     58     : custom_bucket_limits_(custom_bucket_limits.begin(),
     59                             custom_bucket_limits.end()),
     60       bucket_limits_(custom_bucket_limits_) {
     61 #ifndef NDEBUG
     62   DCHECK_GT(bucket_limits_.size(), size_t{0});
     63   // Verify that the bucket boundaries are strictly increasing
     64   for (size_t i = 1; i < bucket_limits_.size(); i++) {
     65     DCHECK_GT(bucket_limits_[i], bucket_limits_[i - 1]);
     66   }
     67 #endif
     68   Clear();
     69 }
     70 
     71 bool Histogram::DecodeFromProto(const HistogramProto& proto) {
     72   if ((proto.bucket_size() != proto.bucket_limit_size()) ||
     73       (proto.bucket_size() == 0)) {
     74     return false;
     75   }
     76   min_ = proto.min();
     77   max_ = proto.max();
     78   num_ = proto.num();
     79   sum_ = proto.sum();
     80   sum_squares_ = proto.sum_squares();
     81   custom_bucket_limits_.clear();
     82   custom_bucket_limits_.insert(custom_bucket_limits_.end(),
     83                                proto.bucket_limit().begin(),
     84                                proto.bucket_limit().end());
     85   bucket_limits_ = custom_bucket_limits_;
     86   buckets_.clear();
     87   buckets_.insert(buckets_.end(), proto.bucket().begin(), proto.bucket().end());
     88   return true;
     89 }
     90 
     91 void Histogram::Clear() {
     92   min_ = bucket_limits_[bucket_limits_.size() - 1];
     93   max_ = -DBL_MAX;
     94   num_ = 0;
     95   sum_ = 0;
     96   sum_squares_ = 0;
     97   buckets_.resize(bucket_limits_.size());
     98   for (size_t i = 0; i < bucket_limits_.size(); i++) {
     99     buckets_[i] = 0;
    100   }
    101 }
    102 
    103 void Histogram::Add(double value) {
    104   int b =
    105       std::upper_bound(bucket_limits_.begin(), bucket_limits_.end(), value) -
    106       bucket_limits_.begin();
    107 
    108   buckets_[b] += 1.0;
    109   if (min_ > value) min_ = value;
    110   if (max_ < value) max_ = value;
    111   num_++;
    112   sum_ += value;
    113   sum_squares_ += (value * value);
    114 }
    115 
    116 double Histogram::Median() const { return Percentile(50.0); }
    117 
    118 // Linearly map the variable x from [x0, x1] unto [y0, y1]
    119 double Histogram::Remap(double x, double x0, double x1, double y0,
    120                         double y1) const {
    121   return y0 + (x - x0) / (x1 - x0) * (y1 - y0);
    122 }
    123 
    124 // Pick tight left-hand-side and right-hand-side bounds and then
    125 // interpolate a histogram value at percentile p
    126 double Histogram::Percentile(double p) const {
    127   if (num_ == 0.0) return 0.0;
    128 
    129   double threshold = num_ * (p / 100.0);
    130   double cumsum_prev = 0;
    131   for (size_t i = 0; i < buckets_.size(); i++) {
    132     double cumsum = cumsum_prev + buckets_[i];
    133 
    134     // Find the first bucket whose cumsum >= threshold
    135     if (cumsum >= threshold) {
    136       // Prevent divide by 0 in remap which happens if cumsum == cumsum_prev
    137       // This should only get hit when p == 0, cumsum == 0, and cumsum_prev == 0
    138       if (cumsum == cumsum_prev) {
    139         continue;
    140       }
    141 
    142       // Calculate the lower bound of interpolation
    143       double lhs = (i == 0 || cumsum_prev == 0) ? min_ : bucket_limits_[i - 1];
    144       lhs = std::max(lhs, min_);
    145 
    146       // Calculate the upper bound of interpolation
    147       double rhs = bucket_limits_[i];
    148       rhs = std::min(rhs, max_);
    149 
    150       double weight = Remap(threshold, cumsum_prev, cumsum, lhs, rhs);
    151       return weight;
    152     }
    153 
    154     cumsum_prev = cumsum;
    155   }
    156   return max_;
    157 }
    158 
    159 double Histogram::Average() const {
    160   if (num_ == 0.0) return 0;
    161   return sum_ / num_;
    162 }
    163 
    164 double Histogram::StandardDeviation() const {
    165   if (num_ == 0.0) return 0;
    166   double variance = (sum_squares_ * num_ - sum_ * sum_) / (num_ * num_);
    167   return sqrt(variance);
    168 }
    169 
    170 std::string Histogram::ToString() const {
    171   std::string r;
    172   char buf[200];
    173   snprintf(buf, sizeof(buf), "Count: %.0f  Average: %.4f  StdDev: %.2f\n", num_,
    174            Average(), StandardDeviation());
    175   r.append(buf);
    176   snprintf(buf, sizeof(buf), "Min: %.4f  Median: %.4f  Max: %.4f\n",
    177            (num_ == 0.0 ? 0.0 : min_), Median(), max_);
    178   r.append(buf);
    179   r.append("------------------------------------------------------\n");
    180   const double mult = num_ > 0 ? 100.0 / num_ : 0.0;
    181   double sum = 0;
    182   for (size_t b = 0; b < buckets_.size(); b++) {
    183     if (buckets_[b] <= 0.0) continue;
    184     sum += buckets_[b];
    185     snprintf(buf, sizeof(buf), "[ %10.2g, %10.2g ) %7.0f %7.3f%% %7.3f%% ",
    186              ((b == 0) ? -DBL_MAX : bucket_limits_[b - 1]),  // left
    187              bucket_limits_[b],                              // right
    188              buckets_[b],                                    // count
    189              mult * buckets_[b],                             // percentage
    190              mult * sum);                                    // cum percentage
    191     r.append(buf);
    192 
    193     // Add hash marks based on percentage; 20 marks for 100%.
    194     int marks = static_cast<int>(20 * (buckets_[b] / num_) + 0.5);
    195     r.append(marks, '#');
    196     r.push_back('\n');
    197   }
    198   return r;
    199 }
    200 
    201 void Histogram::EncodeToProto(HistogramProto* proto,
    202                               bool preserve_zero_buckets) const {
    203   proto->Clear();
    204   proto->set_min(min_);
    205   proto->set_max(max_);
    206   proto->set_num(num_);
    207   proto->set_sum(sum_);
    208   proto->set_sum_squares(sum_squares_);
    209   for (size_t i = 0; i < buckets_.size();) {
    210     double end = bucket_limits_[i];
    211     double count = buckets_[i];
    212     i++;
    213     if (!preserve_zero_buckets && count <= 0.0) {
    214       // Find run of empty buckets and collapse them into one
    215       while (i < buckets_.size() && buckets_[i] <= 0.0) {
    216         end = bucket_limits_[i];
    217         count = buckets_[i];
    218         i++;
    219       }
    220     }
    221     proto->add_bucket_limit(end);
    222     proto->add_bucket(count);
    223   }
    224   if (proto->bucket_size() == 0.0) {
    225     // It's easier when we restore if we always have at least one bucket entry
    226     proto->add_bucket_limit(DBL_MAX);
    227     proto->add_bucket(0.0);
    228   }
    229 }
    230 
    231 // ThreadSafeHistogram implementation.
    232 bool ThreadSafeHistogram::DecodeFromProto(const HistogramProto& proto) {
    233   mutex_lock l(mu_);
    234   return histogram_.DecodeFromProto(proto);
    235 }
    236 
    237 void ThreadSafeHistogram::Clear() {
    238   mutex_lock l(mu_);
    239   histogram_.Clear();
    240 }
    241 
    242 void ThreadSafeHistogram::Add(double value) {
    243   mutex_lock l(mu_);
    244   histogram_.Add(value);
    245 }
    246 
    247 void ThreadSafeHistogram::EncodeToProto(HistogramProto* proto,
    248                                         bool preserve_zero_buckets) const {
    249   mutex_lock l(mu_);
    250   histogram_.EncodeToProto(proto, preserve_zero_buckets);
    251 }
    252 
    253 double ThreadSafeHistogram::Median() const {
    254   mutex_lock l(mu_);
    255   return histogram_.Median();
    256 }
    257 
    258 double ThreadSafeHistogram::Percentile(double p) const {
    259   mutex_lock l(mu_);
    260   return histogram_.Percentile(p);
    261 }
    262 
    263 double ThreadSafeHistogram::Average() const {
    264   mutex_lock l(mu_);
    265   return histogram_.Average();
    266 }
    267 
    268 double ThreadSafeHistogram::StandardDeviation() const {
    269   mutex_lock l(mu_);
    270   return histogram_.StandardDeviation();
    271 }
    272 
    273 std::string ThreadSafeHistogram::ToString() const {
    274   mutex_lock l(mu_);
    275   return histogram_.ToString();
    276 }
    277 
    278 }  // namespace histogram
    279 }  // namespace tensorflow
    280