Home | History | Annotate | Download | only in metrics
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Histogram is an object that aggregates statistics, and can summarize them in
      6 // various forms, including ASCII graphical, HTML, and numerically (as a
      7 // vector of numbers corresponding to each of the aggregating buckets).
      8 
      9 // It supports calls to accumulate either time intervals (which are processed
     10 // as integral number of milliseconds), or arbitrary integral units.
     11 
     12 // For Histogram(exponential histogram), LinearHistogram and CustomHistogram,
     13 // the minimum for a declared range is 1 (instead of 0), while the maximum is
     14 // (HistogramBase::kSampleType_MAX - 1). Currently you can declare histograms
     15 // with ranges exceeding those limits (e.g. 0 as minimal or
     16 // HistogramBase::kSampleType_MAX as maximal), but those excesses will be
     17 // silently clamped to those limits (for backwards compatibility with existing
     18 // code). Best practice is to not exceed the limits.
     19 
     20 // Each use of a histogram with the same name will reference the same underlying
     21 // data, so it is safe to record to the same histogram from multiple locations
     22 // in the code. It is a runtime error if all uses of the same histogram do not
     23 // agree exactly in type, bucket size and range.
     24 
     25 // For Histogram and LinearHistogram, the maximum for a declared range should
     26 // always be larger (not equal) than minimal range. Zero and
     27 // HistogramBase::kSampleType_MAX are implicitly added as first and last ranges,
     28 // so the smallest legal bucket_count is 3. However CustomHistogram can have
     29 // bucket count as 2 (when you give a custom ranges vector containing only 1
     30 // range).
     31 // For these 3 kinds of histograms, the max bucket count is always
     32 // (Histogram::kBucketCount_MAX - 1).
     33 
     34 // The buckets layout of class Histogram is exponential. For example, buckets
     35 // might contain (sequentially) the count of values in the following intervals:
     36 // [0,1), [1,2), [2,4), [4,8), [8,16), [16,32), [32,64), [64,infinity)
     37 // That bucket allocation would actually result from construction of a histogram
     38 // for values between 1 and 64, with 8 buckets, such as:
     39 // Histogram count("some name", 1, 64, 8);
     40 // Note that the underflow bucket [0,1) and the overflow bucket [64,infinity)
     41 // are also counted by the constructor in the user supplied "bucket_count"
     42 // argument.
     43 // The above example has an exponential ratio of 2 (doubling the bucket width
     44 // in each consecutive bucket.  The Histogram class automatically calculates
     45 // the smallest ratio that it can use to construct the number of buckets
     46 // selected in the constructor.  An another example, if you had 50 buckets,
     47 // and millisecond time values from 1 to 10000, then the ratio between
     48 // consecutive bucket widths will be approximately somewhere around the 50th
     49 // root of 10000.  This approach provides very fine grain (narrow) buckets
     50 // at the low end of the histogram scale, but allows the histogram to cover a
     51 // gigantic range with the addition of very few buckets.
     52 
     53 // Usually we use macros to define and use a histogram, which are defined in
     54 // base/metrics/histogram_macros.h. Note: Callers should include that header
     55 // directly if they only access the histogram APIs through macros.
     56 //
     57 // Macros use a pattern involving a function static variable, that is a pointer
     58 // to a histogram.  This static is explicitly initialized on any thread
     59 // that detects a uninitialized (NULL) pointer.  The potentially racy
     60 // initialization is not a problem as it is always set to point to the same
     61 // value (i.e., the FactoryGet always returns the same value).  FactoryGet
     62 // is also completely thread safe, which results in a completely thread safe,
     63 // and relatively fast, set of counters.  To avoid races at shutdown, the static
     64 // pointer is NOT deleted, and we leak the histograms at process termination.
     65 
     66 #ifndef BASE_METRICS_HISTOGRAM_H_
     67 #define BASE_METRICS_HISTOGRAM_H_
     68 
     69 #include <stddef.h>
     70 #include <stdint.h>
     71 
     72 #include <map>
     73 #include <string>
     74 #include <vector>
     75 
     76 #include "base/base_export.h"
     77 #include "base/compiler_specific.h"
     78 #include "base/gtest_prod_util.h"
     79 #include "base/logging.h"
     80 #include "base/macros.h"
     81 #include "base/memory/scoped_ptr.h"
     82 #include "base/metrics/bucket_ranges.h"
     83 #include "base/metrics/histogram_base.h"
     84 // TODO(asvitkine): Migrate callers to to include this directly and remove this.
     85 #include "base/metrics/histogram_macros.h"
     86 #include "base/metrics/histogram_samples.h"
     87 #include "base/time/time.h"
     88 
     89 namespace base {
     90 
     91 class BooleanHistogram;
     92 class CustomHistogram;
     93 class Histogram;
     94 class LinearHistogram;
     95 class Pickle;
     96 class PickleIterator;
     97 class SampleVector;
     98 
     99 class BASE_EXPORT Histogram : public HistogramBase {
    100  public:
    101   // Initialize maximum number of buckets in histograms as 16,384.
    102   static const size_t kBucketCount_MAX;
    103 
    104   typedef std::vector<Count> Counts;
    105 
    106   //----------------------------------------------------------------------------
    107   // For a valid histogram, input should follow these restrictions:
    108   // minimum > 0 (if a minimum below 1 is specified, it will implicitly be
    109   //              normalized up to 1)
    110   // maximum > minimum
    111   // buckets > 2 [minimum buckets needed: underflow, overflow and the range]
    112   // Additionally,
    113   // buckets <= (maximum - minimum + 2) - this is to ensure that we don't have
    114   // more buckets than the range of numbers; having more buckets than 1 per
    115   // value in the range would be nonsensical.
    116   static HistogramBase* FactoryGet(const std::string& name,
    117                                    Sample minimum,
    118                                    Sample maximum,
    119                                    size_t bucket_count,
    120                                    int32_t flags);
    121   static HistogramBase* FactoryTimeGet(const std::string& name,
    122                                        base::TimeDelta minimum,
    123                                        base::TimeDelta maximum,
    124                                        size_t bucket_count,
    125                                        int32_t flags);
    126 
    127   // Overloads of the above two functions that take a const char* |name| param,
    128   // to avoid code bloat from the std::string constructor being inlined into
    129   // call sites.
    130   static HistogramBase* FactoryGet(const char* name,
    131                                    Sample minimum,
    132                                    Sample maximum,
    133                                    size_t bucket_count,
    134                                    int32_t flags);
    135   static HistogramBase* FactoryTimeGet(const char* name,
    136                                        base::TimeDelta minimum,
    137                                        base::TimeDelta maximum,
    138                                        size_t bucket_count,
    139                                        int32_t flags);
    140 
    141   static void InitializeBucketRanges(Sample minimum,
    142                                      Sample maximum,
    143                                      BucketRanges* ranges);
    144 
    145   // This constant if for FindCorruption. Since snapshots of histograms are
    146   // taken asynchronously relative to sampling, and our counting code currently
    147   // does not prevent race conditions, it is pretty likely that we'll catch a
    148   // redundant count that doesn't match the sample count.  We allow for a
    149   // certain amount of slop before flagging this as an inconsistency. Even with
    150   // an inconsistency, we'll snapshot it again (for UMA in about a half hour),
    151   // so we'll eventually get the data, if it was not the result of a corruption.
    152   static const int kCommonRaceBasedCountMismatch;
    153 
    154   // Check to see if bucket ranges, counts and tallies in the snapshot are
    155   // consistent with the bucket ranges and checksums in our histogram.  This can
    156   // produce a false-alarm if a race occurred in the reading of the data during
    157   // a SnapShot process, but should otherwise be false at all times (unless we
    158   // have memory over-writes, or DRAM failures).
    159   int FindCorruption(const HistogramSamples& samples) const override;
    160 
    161   //----------------------------------------------------------------------------
    162   // Accessors for factory construction, serialization and testing.
    163   //----------------------------------------------------------------------------
    164   Sample declared_min() const { return declared_min_; }
    165   Sample declared_max() const { return declared_max_; }
    166   virtual Sample ranges(size_t i) const;
    167   virtual size_t bucket_count() const;
    168   const BucketRanges* bucket_ranges() const { return bucket_ranges_; }
    169 
    170   // This function validates histogram construction arguments. It returns false
    171   // if some of the arguments are totally bad.
    172   // Note. Currently it allow some bad input, e.g. 0 as minimum, but silently
    173   // converts it to good input: 1.
    174   // TODO(kaiwang): Be more restrict and return false for any bad input, and
    175   // make this a readonly validating function.
    176   static bool InspectConstructionArguments(const std::string& name,
    177                                            Sample* minimum,
    178                                            Sample* maximum,
    179                                            size_t* bucket_count);
    180 
    181   // HistogramBase implementation:
    182   uint64_t name_hash() const override;
    183   HistogramType GetHistogramType() const override;
    184   bool HasConstructionArguments(Sample expected_minimum,
    185                                 Sample expected_maximum,
    186                                 size_t expected_bucket_count) const override;
    187   void Add(Sample value) override;
    188   void AddCount(Sample value, int count) override;
    189   scoped_ptr<HistogramSamples> SnapshotSamples() const override;
    190   void AddSamples(const HistogramSamples& samples) override;
    191   bool AddSamplesFromPickle(base::PickleIterator* iter) override;
    192   void WriteHTMLGraph(std::string* output) const override;
    193   void WriteAscii(std::string* output) const override;
    194 
    195  protected:
    196   // |ranges| should contain the underflow and overflow buckets. See top
    197   // comments for example.
    198   Histogram(const std::string& name,
    199             Sample minimum,
    200             Sample maximum,
    201             const BucketRanges* ranges);
    202 
    203   ~Histogram() override;
    204 
    205   // HistogramBase implementation:
    206   bool SerializeInfoImpl(base::Pickle* pickle) const override;
    207 
    208   // Method to override to skip the display of the i'th bucket if it's empty.
    209   virtual bool PrintEmptyBucket(size_t index) const;
    210 
    211   // Get normalized size, relative to the ranges(i).
    212   virtual double GetBucketSize(Count current, size_t i) const;
    213 
    214   // Return a string description of what goes in a given bucket.
    215   // Most commonly this is the numeric value, but in derived classes it may
    216   // be a name (or string description) given to the bucket.
    217   virtual const std::string GetAsciiBucketRange(size_t it) const;
    218 
    219  private:
    220   // Allow tests to corrupt our innards for testing purposes.
    221   FRIEND_TEST_ALL_PREFIXES(HistogramTest, BoundsTest);
    222   FRIEND_TEST_ALL_PREFIXES(HistogramTest, BucketPlacementTest);
    223   FRIEND_TEST_ALL_PREFIXES(HistogramTest, CorruptBucketBounds);
    224   FRIEND_TEST_ALL_PREFIXES(HistogramTest, CorruptSampleCounts);
    225   FRIEND_TEST_ALL_PREFIXES(HistogramTest, NameMatchTest);
    226   FRIEND_TEST_ALL_PREFIXES(HistogramTest, AddCountTest);
    227 
    228   friend class StatisticsRecorder;  // To allow it to delete duplicates.
    229   friend class StatisticsRecorderTest;
    230 
    231   friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
    232       base::PickleIterator* iter);
    233   static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
    234 
    235   // Implementation of SnapshotSamples function.
    236   scoped_ptr<SampleVector> SnapshotSampleVector() const;
    237 
    238   //----------------------------------------------------------------------------
    239   // Helpers for emitting Ascii graphic.  Each method appends data to output.
    240 
    241   void WriteAsciiImpl(bool graph_it,
    242                       const std::string& newline,
    243                       std::string* output) const;
    244 
    245   // Find out how large (graphically) the largest bucket will appear to be.
    246   double GetPeakBucketSize(const SampleVector& samples) const;
    247 
    248   // Write a common header message describing this histogram.
    249   void WriteAsciiHeader(const SampleVector& samples,
    250                         Count sample_count,
    251                         std::string* output) const;
    252 
    253   // Write information about previous, current, and next buckets.
    254   // Information such as cumulative percentage, etc.
    255   void WriteAsciiBucketContext(const int64_t past,
    256                                const Count current,
    257                                const int64_t remaining,
    258                                const size_t i,
    259                                std::string* output) const;
    260 
    261   // WriteJSON calls these.
    262   void GetParameters(DictionaryValue* params) const override;
    263 
    264   void GetCountAndBucketData(Count* count,
    265                              int64_t* sum,
    266                              ListValue* buckets) const override;
    267 
    268   // Does not own this object. Should get from StatisticsRecorder.
    269   const BucketRanges* bucket_ranges_;
    270 
    271   Sample declared_min_;  // Less than this goes into the first bucket.
    272   Sample declared_max_;  // Over this goes into the last bucket.
    273 
    274   // Finally, provide the state that changes with the addition of each new
    275   // sample.
    276   scoped_ptr<SampleVector> samples_;
    277 
    278   DISALLOW_COPY_AND_ASSIGN(Histogram);
    279 };
    280 
    281 //------------------------------------------------------------------------------
    282 
    283 // LinearHistogram is a more traditional histogram, with evenly spaced
    284 // buckets.
    285 class BASE_EXPORT LinearHistogram : public Histogram {
    286  public:
    287   ~LinearHistogram() override;
    288 
    289   /* minimum should start from 1. 0 is as minimum is invalid. 0 is an implicit
    290      default underflow bucket. */
    291   static HistogramBase* FactoryGet(const std::string& name,
    292                                    Sample minimum,
    293                                    Sample maximum,
    294                                    size_t bucket_count,
    295                                    int32_t flags);
    296   static HistogramBase* FactoryTimeGet(const std::string& name,
    297                                        TimeDelta minimum,
    298                                        TimeDelta maximum,
    299                                        size_t bucket_count,
    300                                        int32_t flags);
    301 
    302   // Overloads of the above two functions that take a const char* |name| param,
    303   // to avoid code bloat from the std::string constructor being inlined into
    304   // call sites.
    305   static HistogramBase* FactoryGet(const char* name,
    306                                    Sample minimum,
    307                                    Sample maximum,
    308                                    size_t bucket_count,
    309                                    int32_t flags);
    310   static HistogramBase* FactoryTimeGet(const char* name,
    311                                        TimeDelta minimum,
    312                                        TimeDelta maximum,
    313                                        size_t bucket_count,
    314                                        int32_t flags);
    315 
    316   struct DescriptionPair {
    317     Sample sample;
    318     const char* description;  // Null means end of a list of pairs.
    319   };
    320 
    321   // Create a LinearHistogram and store a list of number/text values for use in
    322   // writing the histogram graph.
    323   // |descriptions| can be NULL, which means no special descriptions to set. If
    324   // it's not NULL, the last element in the array must has a NULL in its
    325   // "description" field.
    326   static HistogramBase* FactoryGetWithRangeDescription(
    327       const std::string& name,
    328       Sample minimum,
    329       Sample maximum,
    330       size_t bucket_count,
    331       int32_t flags,
    332       const DescriptionPair descriptions[]);
    333 
    334   static void InitializeBucketRanges(Sample minimum,
    335                                      Sample maximum,
    336                                      BucketRanges* ranges);
    337 
    338   // Overridden from Histogram:
    339   HistogramType GetHistogramType() const override;
    340 
    341  protected:
    342   LinearHistogram(const std::string& name,
    343                   Sample minimum,
    344                   Sample maximum,
    345                   const BucketRanges* ranges);
    346 
    347   double GetBucketSize(Count current, size_t i) const override;
    348 
    349   // If we have a description for a bucket, then return that.  Otherwise
    350   // let parent class provide a (numeric) description.
    351   const std::string GetAsciiBucketRange(size_t i) const override;
    352 
    353   // Skip printing of name for numeric range if we have a name (and if this is
    354   // an empty bucket).
    355   bool PrintEmptyBucket(size_t index) const override;
    356 
    357  private:
    358   friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
    359       base::PickleIterator* iter);
    360   static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
    361 
    362   // For some ranges, we store a printable description of a bucket range.
    363   // If there is no description, then GetAsciiBucketRange() uses parent class
    364   // to provide a description.
    365   typedef std::map<Sample, std::string> BucketDescriptionMap;
    366   BucketDescriptionMap bucket_description_;
    367 
    368   DISALLOW_COPY_AND_ASSIGN(LinearHistogram);
    369 };
    370 
    371 //------------------------------------------------------------------------------
    372 
    373 // BooleanHistogram is a histogram for booleans.
    374 class BASE_EXPORT BooleanHistogram : public LinearHistogram {
    375  public:
    376   static HistogramBase* FactoryGet(const std::string& name, int32_t flags);
    377 
    378   // Overload of the above function that takes a const char* |name| param,
    379   // to avoid code bloat from the std::string constructor being inlined into
    380   // call sites.
    381   static HistogramBase* FactoryGet(const char* name, int32_t flags);
    382 
    383   HistogramType GetHistogramType() const override;
    384 
    385  private:
    386   BooleanHistogram(const std::string& name, const BucketRanges* ranges);
    387 
    388   friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
    389       base::PickleIterator* iter);
    390   static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
    391 
    392   DISALLOW_COPY_AND_ASSIGN(BooleanHistogram);
    393 };
    394 
    395 //------------------------------------------------------------------------------
    396 
    397 // CustomHistogram is a histogram for a set of custom integers.
    398 class BASE_EXPORT CustomHistogram : public Histogram {
    399  public:
    400   // |custom_ranges| contains a vector of limits on ranges. Each limit should be
    401   // > 0 and < kSampleType_MAX. (Currently 0 is still accepted for backward
    402   // compatibility). The limits can be unordered or contain duplication, but
    403   // client should not depend on this.
    404   static HistogramBase* FactoryGet(const std::string& name,
    405                                    const std::vector<Sample>& custom_ranges,
    406                                    int32_t flags);
    407 
    408   // Overload of the above function that takes a const char* |name| param,
    409   // to avoid code bloat from the std::string constructor being inlined into
    410   // call sites.
    411   static HistogramBase* FactoryGet(const char* name,
    412                                    const std::vector<Sample>& custom_ranges,
    413                                    int32_t flags);
    414 
    415   // Overridden from Histogram:
    416   HistogramType GetHistogramType() const override;
    417 
    418   // Helper method for transforming an array of valid enumeration values
    419   // to the std::vector<int> expected by UMA_HISTOGRAM_CUSTOM_ENUMERATION.
    420   // This function ensures that a guard bucket exists right after any
    421   // valid sample value (unless the next higher sample is also a valid value),
    422   // so that invalid samples never fall into the same bucket as valid samples.
    423   // TODO(kaiwang): Change name to ArrayToCustomEnumRanges.
    424   static std::vector<Sample> ArrayToCustomRanges(const Sample* values,
    425                                                  size_t num_values);
    426  protected:
    427   CustomHistogram(const std::string& name,
    428                   const BucketRanges* ranges);
    429 
    430   // HistogramBase implementation:
    431   bool SerializeInfoImpl(base::Pickle* pickle) const override;
    432 
    433   double GetBucketSize(Count current, size_t i) const override;
    434 
    435  private:
    436   friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
    437       base::PickleIterator* iter);
    438   static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
    439 
    440   static bool ValidateCustomRanges(const std::vector<Sample>& custom_ranges);
    441   static BucketRanges* CreateBucketRangesFromCustomRanges(
    442       const std::vector<Sample>& custom_ranges);
    443 
    444   DISALLOW_COPY_AND_ASSIGN(CustomHistogram);
    445 };
    446 
    447 }  // namespace base
    448 
    449 #endif  // BASE_METRICS_HISTOGRAM_H_
    450