Home | History | Annotate | Download | only in metrics
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Histogram is an object that aggregates statistics, and can summarize them in
      6 // various forms, including ASCII graphical, HTML, and numerically (as a
      7 // vector of numbers corresponding to each of the aggregating buckets).
      8 
      9 // It supports calls to accumulate either time intervals (which are processed
     10 // as integral number of milliseconds), or arbitrary integral units.
     11 
     12 // For Histogram (exponential histogram), LinearHistogram and CustomHistogram,
     13 // the minimum for a declared range is 1 (instead of 0), while the maximum is
     14 // (HistogramBase::kSampleType_MAX - 1). However, there will always be underflow
     15 // and overflow buckets added automatically, so a 0 bucket will always exist
     16 // even when a minimum value of 1 is specified.
     17 
     18 // Each use of a histogram with the same name will reference the same underlying
     19 // data, so it is safe to record to the same histogram from multiple locations
     20 // in the code. It is a runtime error if all uses of the same histogram do not
     21 // agree exactly in type, bucket size and range.
     22 
     23 // For Histogram and LinearHistogram, the maximum for a declared range should
     24 // always be larger (not equal) than minimal range. Zero and
     25 // HistogramBase::kSampleType_MAX are implicitly added as first and last ranges,
     26 // so the smallest legal bucket_count is 3. However CustomHistogram can have
     27 // bucket count as 2 (when you give a custom ranges vector containing only 1
     28 // range).
     29 // For these 3 kinds of histograms, the max bucket count is always
     30 // (Histogram::kBucketCount_MAX - 1).
     31 
     32 // The buckets layout of class Histogram is exponential. For example, buckets
     33 // might contain (sequentially) the count of values in the following intervals:
     34 // [0,1), [1,2), [2,4), [4,8), [8,16), [16,32), [32,64), [64,infinity)
     35 // That bucket allocation would actually result from construction of a histogram
     36 // for values between 1 and 64, with 8 buckets, such as:
     37 // Histogram count("some name", 1, 64, 8);
     38 // Note that the underflow bucket [0,1) and the overflow bucket [64,infinity)
     39 // are also counted by the constructor in the user supplied "bucket_count"
     40 // argument.
     41 // The above example has an exponential ratio of 2 (doubling the bucket width
     42 // in each consecutive bucket).  The Histogram class automatically calculates
     43 // the smallest ratio that it can use to construct the number of buckets
     44 // selected in the constructor.  An another example, if you had 50 buckets,
     45 // and millisecond time values from 1 to 10000, then the ratio between
     46 // consecutive bucket widths will be approximately somewhere around the 50th
     47 // root of 10000.  This approach provides very fine grain (narrow) buckets
     48 // at the low end of the histogram scale, but allows the histogram to cover a
     49 // gigantic range with the addition of very few buckets.
     50 
     51 // Usually we use macros to define and use a histogram, which are defined in
     52 // base/metrics/histogram_macros.h. Note: Callers should include that header
     53 // directly if they only access the histogram APIs through macros.
     54 //
     55 // Macros use a pattern involving a function static variable, that is a pointer
     56 // to a histogram.  This static is explicitly initialized on any thread
     57 // that detects a uninitialized (NULL) pointer.  The potentially racy
     58 // initialization is not a problem as it is always set to point to the same
     59 // value (i.e., the FactoryGet always returns the same value).  FactoryGet
     60 // is also completely thread safe, which results in a completely thread safe,
     61 // and relatively fast, set of counters.  To avoid races at shutdown, the static
     62 // pointer is NOT deleted, and we leak the histograms at process termination.
     63 
     64 #ifndef BASE_METRICS_HISTOGRAM_H_
     65 #define BASE_METRICS_HISTOGRAM_H_
     66 
     67 #include <stddef.h>
     68 #include <stdint.h>
     69 
     70 #include <map>
     71 #include <memory>
     72 #include <string>
     73 #include <vector>
     74 
     75 #include "base/base_export.h"
     76 #include "base/compiler_specific.h"
     77 #include "base/gtest_prod_util.h"
     78 #include "base/logging.h"
     79 #include "base/macros.h"
     80 #include "base/metrics/bucket_ranges.h"
     81 #include "base/metrics/histogram_base.h"
     82 #include "base/metrics/histogram_samples.h"
     83 #include "base/time/time.h"
     84 
     85 namespace base {
     86 
     87 class BooleanHistogram;
     88 class CustomHistogram;
     89 class Histogram;
     90 class LinearHistogram;
     91 class Pickle;
     92 class PickleIterator;
     93 class SampleVector;
     94 
     95 class BASE_EXPORT Histogram : public HistogramBase {
     96  public:
     97   // Initialize maximum number of buckets in histograms as 16,384.
     98   static const uint32_t kBucketCount_MAX;
     99 
    100   typedef std::vector<Count> Counts;
    101 
    102   ~Histogram() override;
    103 
    104   //----------------------------------------------------------------------------
    105   // For a valid histogram, input should follow these restrictions:
    106   // minimum > 0 (if a minimum below 1 is specified, it will implicitly be
    107   //              normalized up to 1)
    108   // maximum > minimum
    109   // buckets > 2 [minimum buckets needed: underflow, overflow and the range]
    110   // Additionally,
    111   // buckets <= (maximum - minimum + 2) - this is to ensure that we don't have
    112   // more buckets than the range of numbers; having more buckets than 1 per
    113   // value in the range would be nonsensical.
    114   static HistogramBase* FactoryGet(const std::string& name,
    115                                    Sample minimum,
    116                                    Sample maximum,
    117                                    uint32_t bucket_count,
    118                                    int32_t flags);
    119   static HistogramBase* FactoryTimeGet(const std::string& name,
    120                                        base::TimeDelta minimum,
    121                                        base::TimeDelta maximum,
    122                                        uint32_t bucket_count,
    123                                        int32_t flags);
    124 
    125   // Overloads of the above two functions that take a const char* |name| param,
    126   // to avoid code bloat from the std::string constructor being inlined into
    127   // call sites.
    128   static HistogramBase* FactoryGet(const char* name,
    129                                    Sample minimum,
    130                                    Sample maximum,
    131                                    uint32_t bucket_count,
    132                                    int32_t flags);
    133   static HistogramBase* FactoryTimeGet(const char* name,
    134                                        base::TimeDelta minimum,
    135                                        base::TimeDelta maximum,
    136                                        uint32_t bucket_count,
    137                                        int32_t flags);
    138 
    139   // Create a histogram using data in persistent storage.
    140   static std::unique_ptr<HistogramBase> PersistentCreate(
    141       const std::string& name,
    142       Sample minimum,
    143       Sample maximum,
    144       const BucketRanges* ranges,
    145       HistogramBase::AtomicCount* counts,
    146       HistogramBase::AtomicCount* logged_counts,
    147       uint32_t counts_size,
    148       HistogramSamples::Metadata* meta,
    149       HistogramSamples::Metadata* logged_meta);
    150 
    151   static void InitializeBucketRanges(Sample minimum,
    152                                      Sample maximum,
    153                                      BucketRanges* ranges);
    154 
    155   // This constant if for FindCorruption. Since snapshots of histograms are
    156   // taken asynchronously relative to sampling, and our counting code currently
    157   // does not prevent race conditions, it is pretty likely that we'll catch a
    158   // redundant count that doesn't match the sample count.  We allow for a
    159   // certain amount of slop before flagging this as an inconsistency. Even with
    160   // an inconsistency, we'll snapshot it again (for UMA in about a half hour),
    161   // so we'll eventually get the data, if it was not the result of a corruption.
    162   static const int kCommonRaceBasedCountMismatch;
    163 
    164   // Check to see if bucket ranges, counts and tallies in the snapshot are
    165   // consistent with the bucket ranges and checksums in our histogram.  This can
    166   // produce a false-alarm if a race occurred in the reading of the data during
    167   // a SnapShot process, but should otherwise be false at all times (unless we
    168   // have memory over-writes, or DRAM failures). Flag definitions are located
    169   // under "enum Inconsistency" in base/metrics/histogram_base.h.
    170   uint32_t FindCorruption(const HistogramSamples& samples) const override;
    171 
    172   //----------------------------------------------------------------------------
    173   // Accessors for factory construction, serialization and testing.
    174   //----------------------------------------------------------------------------
    175   Sample declared_min() const { return declared_min_; }
    176   Sample declared_max() const { return declared_max_; }
    177   virtual Sample ranges(uint32_t i) const;
    178   virtual uint32_t bucket_count() const;
    179   const BucketRanges* bucket_ranges() const { return bucket_ranges_; }
    180 
    181   // This function validates histogram construction arguments. It returns false
    182   // if some of the arguments are totally bad.
    183   // Note. Currently it allow some bad input, e.g. 0 as minimum, but silently
    184   // converts it to good input: 1.
    185   // TODO(kaiwang): Be more restrict and return false for any bad input, and
    186   // make this a readonly validating function.
    187   static bool InspectConstructionArguments(const std::string& name,
    188                                            Sample* minimum,
    189                                            Sample* maximum,
    190                                            uint32_t* bucket_count);
    191 
    192   // HistogramBase implementation:
    193   uint64_t name_hash() const override;
    194   HistogramType GetHistogramType() const override;
    195   bool HasConstructionArguments(Sample expected_minimum,
    196                                 Sample expected_maximum,
    197                                 uint32_t expected_bucket_count) const override;
    198   void Add(Sample value) override;
    199   void AddCount(Sample value, int count) override;
    200   std::unique_ptr<HistogramSamples> SnapshotSamples() const override;
    201   std::unique_ptr<HistogramSamples> SnapshotDelta() override;
    202   std::unique_ptr<HistogramSamples> SnapshotFinalDelta() const override;
    203   void AddSamples(const HistogramSamples& samples) override;
    204   bool AddSamplesFromPickle(base::PickleIterator* iter) override;
    205   void WriteHTMLGraph(std::string* output) const override;
    206   void WriteAscii(std::string* output) const override;
    207 
    208  protected:
    209   // This class, defined entirely within the .cc file, contains all the
    210   // common logic for building a Histogram and can be overridden by more
    211   // specific types to alter details of how the creation is done. It is
    212   // defined as an embedded class (rather than an anonymous one) so it
    213   // can access the protected constructors.
    214   class Factory;
    215 
    216   // |ranges| should contain the underflow and overflow buckets. See top
    217   // comments for example.
    218   Histogram(const std::string& name,
    219             Sample minimum,
    220             Sample maximum,
    221             const BucketRanges* ranges);
    222 
    223   // Traditionally, histograms allocate their own memory for the bucket
    224   // vector but "shared" histograms use memory regions allocated from a
    225   // special memory segment that is passed in here.  It is assumed that
    226   // the life of this memory is managed externally and exceeds the lifetime
    227   // of this object. Practically, this memory is never released until the
    228   // process exits and the OS cleans it up.
    229   Histogram(const std::string& name,
    230             Sample minimum,
    231             Sample maximum,
    232             const BucketRanges* ranges,
    233             HistogramBase::AtomicCount* counts,
    234             HistogramBase::AtomicCount* logged_counts,
    235             uint32_t counts_size,
    236             HistogramSamples::Metadata* meta,
    237             HistogramSamples::Metadata* logged_meta);
    238 
    239   // HistogramBase implementation:
    240   bool SerializeInfoImpl(base::Pickle* pickle) const override;
    241 
    242   // Method to override to skip the display of the i'th bucket if it's empty.
    243   virtual bool PrintEmptyBucket(uint32_t index) const;
    244 
    245   // Get normalized size, relative to the ranges(i).
    246   virtual double GetBucketSize(Count current, uint32_t i) const;
    247 
    248   // Return a string description of what goes in a given bucket.
    249   // Most commonly this is the numeric value, but in derived classes it may
    250   // be a name (or string description) given to the bucket.
    251   virtual const std::string GetAsciiBucketRange(uint32_t it) const;
    252 
    253  private:
    254   // Allow tests to corrupt our innards for testing purposes.
    255   FRIEND_TEST_ALL_PREFIXES(HistogramTest, BoundsTest);
    256   FRIEND_TEST_ALL_PREFIXES(HistogramTest, BucketPlacementTest);
    257   FRIEND_TEST_ALL_PREFIXES(HistogramTest, CorruptSampleCounts);
    258 
    259   friend class StatisticsRecorder;  // To allow it to delete duplicates.
    260   friend class StatisticsRecorderTest;
    261 
    262   friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
    263       base::PickleIterator* iter);
    264   static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
    265 
    266   // Implementation of SnapshotSamples function.
    267   std::unique_ptr<SampleVector> SnapshotSampleVector() const;
    268 
    269   //----------------------------------------------------------------------------
    270   // Helpers for emitting Ascii graphic.  Each method appends data to output.
    271 
    272   void WriteAsciiImpl(bool graph_it,
    273                       const std::string& newline,
    274                       std::string* output) const;
    275 
    276   // Find out how large (graphically) the largest bucket will appear to be.
    277   double GetPeakBucketSize(const SampleVector& samples) const;
    278 
    279   // Write a common header message describing this histogram.
    280   void WriteAsciiHeader(const SampleVector& samples,
    281                         Count sample_count,
    282                         std::string* output) const;
    283 
    284   // Write information about previous, current, and next buckets.
    285   // Information such as cumulative percentage, etc.
    286   void WriteAsciiBucketContext(const int64_t past,
    287                                const Count current,
    288                                const int64_t remaining,
    289                                const uint32_t i,
    290                                std::string* output) const;
    291 
    292   // WriteJSON calls these.
    293   void GetParameters(DictionaryValue* params) const override;
    294 
    295   void GetCountAndBucketData(Count* count,
    296                              int64_t* sum,
    297                              ListValue* buckets) const override;
    298 
    299   // Does not own this object. Should get from StatisticsRecorder.
    300   const BucketRanges* bucket_ranges_;
    301 
    302   Sample declared_min_;  // Less than this goes into the first bucket.
    303   Sample declared_max_;  // Over this goes into the last bucket.
    304 
    305   // Finally, provide the state that changes with the addition of each new
    306   // sample.
    307   std::unique_ptr<SampleVector> samples_;
    308 
    309   // Also keep a previous uploaded state for calculating deltas.
    310   std::unique_ptr<HistogramSamples> logged_samples_;
    311 
    312   // Flag to indicate if PrepareFinalDelta has been previously called. It is
    313   // used to DCHECK that a final delta is not created multiple times.
    314   mutable bool final_delta_created_ = false;
    315 
    316   DISALLOW_COPY_AND_ASSIGN(Histogram);
    317 };
    318 
    319 //------------------------------------------------------------------------------
    320 
    321 // LinearHistogram is a more traditional histogram, with evenly spaced
    322 // buckets.
    323 class BASE_EXPORT LinearHistogram : public Histogram {
    324  public:
    325   ~LinearHistogram() override;
    326 
    327   /* minimum should start from 1. 0 is as minimum is invalid. 0 is an implicit
    328      default underflow bucket. */
    329   static HistogramBase* FactoryGet(const std::string& name,
    330                                    Sample minimum,
    331                                    Sample maximum,
    332                                    uint32_t bucket_count,
    333                                    int32_t flags);
    334   static HistogramBase* FactoryTimeGet(const std::string& name,
    335                                        TimeDelta minimum,
    336                                        TimeDelta maximum,
    337                                        uint32_t bucket_count,
    338                                        int32_t flags);
    339 
    340   // Overloads of the above two functions that take a const char* |name| param,
    341   // to avoid code bloat from the std::string constructor being inlined into
    342   // call sites.
    343   static HistogramBase* FactoryGet(const char* name,
    344                                    Sample minimum,
    345                                    Sample maximum,
    346                                    uint32_t bucket_count,
    347                                    int32_t flags);
    348   static HistogramBase* FactoryTimeGet(const char* name,
    349                                        TimeDelta minimum,
    350                                        TimeDelta maximum,
    351                                        uint32_t bucket_count,
    352                                        int32_t flags);
    353 
    354   // Create a histogram using data in persistent storage.
    355   static std::unique_ptr<HistogramBase> PersistentCreate(
    356       const std::string& name,
    357       Sample minimum,
    358       Sample maximum,
    359       const BucketRanges* ranges,
    360       HistogramBase::AtomicCount* counts,
    361       HistogramBase::AtomicCount* logged_counts,
    362       uint32_t counts_size,
    363       HistogramSamples::Metadata* meta,
    364       HistogramSamples::Metadata* logged_meta);
    365 
    366   struct DescriptionPair {
    367     Sample sample;
    368     const char* description;  // Null means end of a list of pairs.
    369   };
    370 
    371   // Create a LinearHistogram and store a list of number/text values for use in
    372   // writing the histogram graph.
    373   // |descriptions| can be NULL, which means no special descriptions to set. If
    374   // it's not NULL, the last element in the array must has a NULL in its
    375   // "description" field.
    376   static HistogramBase* FactoryGetWithRangeDescription(
    377       const std::string& name,
    378       Sample minimum,
    379       Sample maximum,
    380       uint32_t bucket_count,
    381       int32_t flags,
    382       const DescriptionPair descriptions[]);
    383 
    384   static void InitializeBucketRanges(Sample minimum,
    385                                      Sample maximum,
    386                                      BucketRanges* ranges);
    387 
    388   // Overridden from Histogram:
    389   HistogramType GetHistogramType() const override;
    390 
    391  protected:
    392   class Factory;
    393 
    394   LinearHistogram(const std::string& name,
    395                   Sample minimum,
    396                   Sample maximum,
    397                   const BucketRanges* ranges);
    398 
    399   LinearHistogram(const std::string& name,
    400                   Sample minimum,
    401                   Sample maximum,
    402                   const BucketRanges* ranges,
    403                   HistogramBase::AtomicCount* counts,
    404                   HistogramBase::AtomicCount* logged_counts,
    405                   uint32_t counts_size,
    406                   HistogramSamples::Metadata* meta,
    407                   HistogramSamples::Metadata* logged_meta);
    408 
    409   double GetBucketSize(Count current, uint32_t i) const override;
    410 
    411   // If we have a description for a bucket, then return that.  Otherwise
    412   // let parent class provide a (numeric) description.
    413   const std::string GetAsciiBucketRange(uint32_t i) const override;
    414 
    415   // Skip printing of name for numeric range if we have a name (and if this is
    416   // an empty bucket).
    417   bool PrintEmptyBucket(uint32_t index) const override;
    418 
    419  private:
    420   friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
    421       base::PickleIterator* iter);
    422   static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
    423 
    424   // For some ranges, we store a printable description of a bucket range.
    425   // If there is no description, then GetAsciiBucketRange() uses parent class
    426   // to provide a description.
    427   typedef std::map<Sample, std::string> BucketDescriptionMap;
    428   BucketDescriptionMap bucket_description_;
    429 
    430   DISALLOW_COPY_AND_ASSIGN(LinearHistogram);
    431 };
    432 
    433 //------------------------------------------------------------------------------
    434 
    435 // BooleanHistogram is a histogram for booleans.
    436 class BASE_EXPORT BooleanHistogram : public LinearHistogram {
    437  public:
    438   static HistogramBase* FactoryGet(const std::string& name, int32_t flags);
    439 
    440   // Overload of the above function that takes a const char* |name| param,
    441   // to avoid code bloat from the std::string constructor being inlined into
    442   // call sites.
    443   static HistogramBase* FactoryGet(const char* name, int32_t flags);
    444 
    445   // Create a histogram using data in persistent storage.
    446   static std::unique_ptr<HistogramBase> PersistentCreate(
    447       const std::string& name,
    448       const BucketRanges* ranges,
    449       HistogramBase::AtomicCount* counts,
    450       HistogramBase::AtomicCount* logged_counts,
    451       HistogramSamples::Metadata* meta,
    452       HistogramSamples::Metadata* logged_meta);
    453 
    454   HistogramType GetHistogramType() const override;
    455 
    456  protected:
    457   class Factory;
    458 
    459  private:
    460   BooleanHistogram(const std::string& name, const BucketRanges* ranges);
    461   BooleanHistogram(const std::string& name,
    462                    const BucketRanges* ranges,
    463                    HistogramBase::AtomicCount* counts,
    464                    HistogramBase::AtomicCount* logged_counts,
    465                    HistogramSamples::Metadata* meta,
    466                    HistogramSamples::Metadata* logged_meta);
    467 
    468   friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
    469       base::PickleIterator* iter);
    470   static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
    471 
    472   DISALLOW_COPY_AND_ASSIGN(BooleanHistogram);
    473 };
    474 
    475 //------------------------------------------------------------------------------
    476 
    477 // CustomHistogram is a histogram for a set of custom integers.
    478 class BASE_EXPORT CustomHistogram : public Histogram {
    479  public:
    480   // |custom_ranges| contains a vector of limits on ranges. Each limit should be
    481   // > 0 and < kSampleType_MAX. (Currently 0 is still accepted for backward
    482   // compatibility). The limits can be unordered or contain duplication, but
    483   // client should not depend on this.
    484   static HistogramBase* FactoryGet(const std::string& name,
    485                                    const std::vector<Sample>& custom_ranges,
    486                                    int32_t flags);
    487 
    488   // Overload of the above function that takes a const char* |name| param,
    489   // to avoid code bloat from the std::string constructor being inlined into
    490   // call sites.
    491   static HistogramBase* FactoryGet(const char* name,
    492                                    const std::vector<Sample>& custom_ranges,
    493                                    int32_t flags);
    494 
    495   // Create a histogram using data in persistent storage.
    496   static std::unique_ptr<HistogramBase> PersistentCreate(
    497       const std::string& name,
    498       const BucketRanges* ranges,
    499       HistogramBase::AtomicCount* counts,
    500       HistogramBase::AtomicCount* logged_counts,
    501       uint32_t counts_size,
    502       HistogramSamples::Metadata* meta,
    503       HistogramSamples::Metadata* logged_meta);
    504 
    505   // Overridden from Histogram:
    506   HistogramType GetHistogramType() const override;
    507 
    508   // Helper method for transforming an array of valid enumeration values
    509   // to the std::vector<int> expected by UMA_HISTOGRAM_CUSTOM_ENUMERATION.
    510   // This function ensures that a guard bucket exists right after any
    511   // valid sample value (unless the next higher sample is also a valid value),
    512   // so that invalid samples never fall into the same bucket as valid samples.
    513   // TODO(kaiwang): Change name to ArrayToCustomEnumRanges.
    514   static std::vector<Sample> ArrayToCustomRanges(const Sample* values,
    515                                                  uint32_t num_values);
    516  protected:
    517   class Factory;
    518 
    519   CustomHistogram(const std::string& name,
    520                   const BucketRanges* ranges);
    521 
    522   CustomHistogram(const std::string& name,
    523                   const BucketRanges* ranges,
    524                   HistogramBase::AtomicCount* counts,
    525                   HistogramBase::AtomicCount* logged_counts,
    526                   uint32_t counts_size,
    527                   HistogramSamples::Metadata* meta,
    528                   HistogramSamples::Metadata* logged_meta);
    529 
    530   // HistogramBase implementation:
    531   bool SerializeInfoImpl(base::Pickle* pickle) const override;
    532 
    533   double GetBucketSize(Count current, uint32_t i) const override;
    534 
    535  private:
    536   friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
    537       base::PickleIterator* iter);
    538   static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
    539 
    540   static bool ValidateCustomRanges(const std::vector<Sample>& custom_ranges);
    541 
    542   DISALLOW_COPY_AND_ASSIGN(CustomHistogram);
    543 };
    544 
    545 }  // namespace base
    546 
    547 #endif  // BASE_METRICS_HISTOGRAM_H_
    548