Home | History | Annotate | Download | only in metrics
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Histogram is an object that aggregates statistics, and can summarize them in
      6 // various forms, including ASCII graphical, HTML, and numerically (as a
      7 // vector of numbers corresponding to each of the aggregating buckets).
      8 
      9 // It supports calls to accumulate either time intervals (which are processed
     10 // as integral number of milliseconds), or arbitrary integral units.
     11 
     12 // For Histogram(exponential histogram), LinearHistogram and CustomHistogram,
     13 // the minimum for a declared range is 1 (instead of 0), while the maximum is
     14 // (HistogramBase::kSampleType_MAX - 1). Currently you can declare histograms
     15 // with ranges exceeding those limits (e.g. 0 as minimal or
     16 // HistogramBase::kSampleType_MAX as maximal), but those excesses will be
     17 // silently clamped to those limits (for backwards compatibility with existing
     18 // code). Best practice is to not exceed the limits.
     19 
     20 // Each use of a histogram with the same name will reference the same underlying
     21 // data, so it is safe to record to the same histogram from multiple locations
     22 // in the code. It is a runtime error if all uses of the same histogram do not
     23 // agree exactly in type, bucket size and range.
     24 
     25 // For Histogram and LinearHistogram, the maximum for a declared range should
     26 // always be larger (not equal) than minimal range. Zero and
     27 // HistogramBase::kSampleType_MAX are implicitly added as first and last ranges,
     28 // so the smallest legal bucket_count is 3. However CustomHistogram can have
     29 // bucket count as 2 (when you give a custom ranges vector containing only 1
     30 // range).
     31 // For these 3 kinds of histograms, the max bucket count is always
     32 // (Histogram::kBucketCount_MAX - 1).
     33 
     34 // The buckets layout of class Histogram is exponential. For example, buckets
     35 // might contain (sequentially) the count of values in the following intervals:
     36 // [0,1), [1,2), [2,4), [4,8), [8,16), [16,32), [32,64), [64,infinity)
     37 // That bucket allocation would actually result from construction of a histogram
     38 // for values between 1 and 64, with 8 buckets, such as:
     39 // Histogram count("some name", 1, 64, 8);
     40 // Note that the underflow bucket [0,1) and the overflow bucket [64,infinity)
     41 // are also counted by the constructor in the user supplied "bucket_count"
     42 // argument.
     43 // The above example has an exponential ratio of 2 (doubling the bucket width
     44 // in each consecutive bucket.  The Histogram class automatically calculates
     45 // the smallest ratio that it can use to construct the number of buckets
     46 // selected in the constructor.  An another example, if you had 50 buckets,
     47 // and millisecond time values from 1 to 10000, then the ratio between
     48 // consecutive bucket widths will be approximately somewhere around the 50th
     49 // root of 10000.  This approach provides very fine grain (narrow) buckets
     50 // at the low end of the histogram scale, but allows the histogram to cover a
     51 // gigantic range with the addition of very few buckets.
     52 
     53 // Usually we use macros to define and use a histogram, which are defined in
     54 // base/metrics/histogram_macros.h. Note: Callers should include that header
     55 // directly if they only access the histogram APIs through macros.
     56 //
     57 // Macros use a pattern involving a function static variable, that is a pointer
     58 // to a histogram.  This static is explicitly initialized on any thread
     59 // that detects a uninitialized (NULL) pointer.  The potentially racy
     60 // initialization is not a problem as it is always set to point to the same
     61 // value (i.e., the FactoryGet always returns the same value).  FactoryGet
     62 // is also completely thread safe, which results in a completely thread safe,
     63 // and relatively fast, set of counters.  To avoid races at shutdown, the static
     64 // pointer is NOT deleted, and we leak the histograms at process termination.
     65 
     66 #ifndef BASE_METRICS_HISTOGRAM_H_
     67 #define BASE_METRICS_HISTOGRAM_H_
     68 
     69 #include <stddef.h>
     70 #include <stdint.h>
     71 
     72 #include <map>
     73 #include <memory>
     74 #include <string>
     75 #include <vector>
     76 
     77 #include "base/base_export.h"
     78 #include "base/compiler_specific.h"
     79 #include "base/gtest_prod_util.h"
     80 #include "base/logging.h"
     81 #include "base/macros.h"
     82 #include "base/metrics/bucket_ranges.h"
     83 #include "base/metrics/histogram_base.h"
     84 // TODO(asvitkine): Migrate callers to to include this directly and remove this.
     85 #include "base/metrics/histogram_macros.h"
     86 #include "base/metrics/histogram_samples.h"
     87 #include "base/time/time.h"
     88 
     89 namespace base {
     90 
     91 class BooleanHistogram;
     92 class CustomHistogram;
     93 class Histogram;
     94 class LinearHistogram;
     95 class PersistentMemoryAllocator;
     96 class Pickle;
     97 class PickleIterator;
     98 class SampleVector;
     99 
    100 class BASE_EXPORT Histogram : public HistogramBase {
    101  public:
    102   // Initialize maximum number of buckets in histograms as 16,384.
    103   static const uint32_t kBucketCount_MAX;
    104 
    105   typedef std::vector<Count> Counts;
    106 
    107   ~Histogram() override;
    108 
    109   //----------------------------------------------------------------------------
    110   // For a valid histogram, input should follow these restrictions:
    111   // minimum > 0 (if a minimum below 1 is specified, it will implicitly be
    112   //              normalized up to 1)
    113   // maximum > minimum
    114   // buckets > 2 [minimum buckets needed: underflow, overflow and the range]
    115   // Additionally,
    116   // buckets <= (maximum - minimum + 2) - this is to ensure that we don't have
    117   // more buckets than the range of numbers; having more buckets than 1 per
    118   // value in the range would be nonsensical.
    119   static HistogramBase* FactoryGet(const std::string& name,
    120                                    Sample minimum,
    121                                    Sample maximum,
    122                                    uint32_t bucket_count,
    123                                    int32_t flags);
    124   static HistogramBase* FactoryTimeGet(const std::string& name,
    125                                        base::TimeDelta minimum,
    126                                        base::TimeDelta maximum,
    127                                        uint32_t bucket_count,
    128                                        int32_t flags);
    129 
    130   // Overloads of the above two functions that take a const char* |name| param,
    131   // to avoid code bloat from the std::string constructor being inlined into
    132   // call sites.
    133   static HistogramBase* FactoryGet(const char* name,
    134                                    Sample minimum,
    135                                    Sample maximum,
    136                                    uint32_t bucket_count,
    137                                    int32_t flags);
    138   static HistogramBase* FactoryTimeGet(const char* name,
    139                                        base::TimeDelta minimum,
    140                                        base::TimeDelta maximum,
    141                                        uint32_t bucket_count,
    142                                        int32_t flags);
    143 
    144   // Create a histogram using data in persistent storage.
    145   static std::unique_ptr<HistogramBase> PersistentCreate(
    146       const std::string& name,
    147       Sample minimum,
    148       Sample maximum,
    149       const BucketRanges* ranges,
    150       HistogramBase::AtomicCount* counts,
    151       HistogramBase::AtomicCount* logged_counts,
    152       uint32_t counts_size,
    153       HistogramSamples::Metadata* meta,
    154       HistogramSamples::Metadata* logged_meta);
    155 
    156   static void InitializeBucketRanges(Sample minimum,
    157                                      Sample maximum,
    158                                      BucketRanges* ranges);
    159 
    160   // This constant if for FindCorruption. Since snapshots of histograms are
    161   // taken asynchronously relative to sampling, and our counting code currently
    162   // does not prevent race conditions, it is pretty likely that we'll catch a
    163   // redundant count that doesn't match the sample count.  We allow for a
    164   // certain amount of slop before flagging this as an inconsistency. Even with
    165   // an inconsistency, we'll snapshot it again (for UMA in about a half hour),
    166   // so we'll eventually get the data, if it was not the result of a corruption.
    167   static const int kCommonRaceBasedCountMismatch;
    168 
    169   // Check to see if bucket ranges, counts and tallies in the snapshot are
    170   // consistent with the bucket ranges and checksums in our histogram.  This can
    171   // produce a false-alarm if a race occurred in the reading of the data during
    172   // a SnapShot process, but should otherwise be false at all times (unless we
    173   // have memory over-writes, or DRAM failures). Flag definitions are located
    174   // under "enum Inconsistency" in base/metrics/histogram_base.h.
    175   uint32_t FindCorruption(const HistogramSamples& samples) const override;
    176 
    177   //----------------------------------------------------------------------------
    178   // Accessors for factory construction, serialization and testing.
    179   //----------------------------------------------------------------------------
    180   Sample declared_min() const { return declared_min_; }
    181   Sample declared_max() const { return declared_max_; }
    182   virtual Sample ranges(uint32_t i) const;
    183   virtual uint32_t bucket_count() const;
    184   const BucketRanges* bucket_ranges() const { return bucket_ranges_; }
    185 
    186   // This function validates histogram construction arguments. It returns false
    187   // if some of the arguments are totally bad.
    188   // Note. Currently it allow some bad input, e.g. 0 as minimum, but silently
    189   // converts it to good input: 1.
    190   // TODO(kaiwang): Be more restrict and return false for any bad input, and
    191   // make this a readonly validating function.
    192   static bool InspectConstructionArguments(const std::string& name,
    193                                            Sample* minimum,
    194                                            Sample* maximum,
    195                                            uint32_t* bucket_count);
    196 
    197   // HistogramBase implementation:
    198   uint64_t name_hash() const override;
    199   HistogramType GetHistogramType() const override;
    200   bool HasConstructionArguments(Sample expected_minimum,
    201                                 Sample expected_maximum,
    202                                 uint32_t expected_bucket_count) const override;
    203   void Add(Sample value) override;
    204   void AddCount(Sample value, int count) override;
    205   std::unique_ptr<HistogramSamples> SnapshotSamples() const override;
    206   std::unique_ptr<HistogramSamples> SnapshotDelta() override;
    207   std::unique_ptr<HistogramSamples> SnapshotFinalDelta() const override;
    208   void AddSamples(const HistogramSamples& samples) override;
    209   bool AddSamplesFromPickle(base::PickleIterator* iter) override;
    210   void WriteHTMLGraph(std::string* output) const override;
    211   void WriteAscii(std::string* output) const override;
    212 
    213  protected:
    214   // This class, defined entirely within the .cc file, contains all the
    215   // common logic for building a Histogram and can be overridden by more
    216   // specific types to alter details of how the creation is done. It is
    217   // defined as an embedded class (rather than an anonymous one) so it
    218   // can access the protected constructors.
    219   class Factory;
    220 
    221   // |ranges| should contain the underflow and overflow buckets. See top
    222   // comments for example.
    223   Histogram(const std::string& name,
    224             Sample minimum,
    225             Sample maximum,
    226             const BucketRanges* ranges);
    227 
    228   // Traditionally, histograms allocate their own memory for the bucket
    229   // vector but "shared" histograms use memory regions allocated from a
    230   // special memory segment that is passed in here.  It is assumed that
    231   // the life of this memory is managed externally and exceeds the lifetime
    232   // of this object. Practically, this memory is never released until the
    233   // process exits and the OS cleans it up.
    234   Histogram(const std::string& name,
    235             Sample minimum,
    236             Sample maximum,
    237             const BucketRanges* ranges,
    238             HistogramBase::AtomicCount* counts,
    239             HistogramBase::AtomicCount* logged_counts,
    240             uint32_t counts_size,
    241             HistogramSamples::Metadata* meta,
    242             HistogramSamples::Metadata* logged_meta);
    243 
    244   // HistogramBase implementation:
    245   bool SerializeInfoImpl(base::Pickle* pickle) const override;
    246 
    247   // Method to override to skip the display of the i'th bucket if it's empty.
    248   virtual bool PrintEmptyBucket(uint32_t index) const;
    249 
    250   // Get normalized size, relative to the ranges(i).
    251   virtual double GetBucketSize(Count current, uint32_t i) const;
    252 
    253   // Return a string description of what goes in a given bucket.
    254   // Most commonly this is the numeric value, but in derived classes it may
    255   // be a name (or string description) given to the bucket.
    256   virtual const std::string GetAsciiBucketRange(uint32_t it) const;
    257 
    258  private:
    259   // Allow tests to corrupt our innards for testing purposes.
    260   FRIEND_TEST_ALL_PREFIXES(HistogramTest, BoundsTest);
    261   FRIEND_TEST_ALL_PREFIXES(HistogramTest, BucketPlacementTest);
    262   FRIEND_TEST_ALL_PREFIXES(HistogramTest, CorruptSampleCounts);
    263 
    264   friend class StatisticsRecorder;  // To allow it to delete duplicates.
    265   friend class StatisticsRecorderTest;
    266 
    267   friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
    268       base::PickleIterator* iter);
    269   static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
    270 
    271   // Implementation of SnapshotSamples function.
    272   std::unique_ptr<SampleVector> SnapshotSampleVector() const;
    273 
    274   //----------------------------------------------------------------------------
    275   // Helpers for emitting Ascii graphic.  Each method appends data to output.
    276 
    277   void WriteAsciiImpl(bool graph_it,
    278                       const std::string& newline,
    279                       std::string* output) const;
    280 
    281   // Find out how large (graphically) the largest bucket will appear to be.
    282   double GetPeakBucketSize(const SampleVector& samples) const;
    283 
    284   // Write a common header message describing this histogram.
    285   void WriteAsciiHeader(const SampleVector& samples,
    286                         Count sample_count,
    287                         std::string* output) const;
    288 
    289   // Write information about previous, current, and next buckets.
    290   // Information such as cumulative percentage, etc.
    291   void WriteAsciiBucketContext(const int64_t past,
    292                                const Count current,
    293                                const int64_t remaining,
    294                                const uint32_t i,
    295                                std::string* output) const;
    296 
    297   // WriteJSON calls these.
    298   void GetParameters(DictionaryValue* params) const override;
    299 
    300   void GetCountAndBucketData(Count* count,
    301                              int64_t* sum,
    302                              ListValue* buckets) const override;
    303 
    304   // Does not own this object. Should get from StatisticsRecorder.
    305   const BucketRanges* bucket_ranges_;
    306 
    307   Sample declared_min_;  // Less than this goes into the first bucket.
    308   Sample declared_max_;  // Over this goes into the last bucket.
    309 
    310   // Finally, provide the state that changes with the addition of each new
    311   // sample.
    312   std::unique_ptr<SampleVector> samples_;
    313 
    314   // Also keep a previous uploaded state for calculating deltas.
    315   std::unique_ptr<HistogramSamples> logged_samples_;
    316 
    317   // Flag to indicate if PrepareFinalDelta has been previously called. It is
    318   // used to DCHECK that a final delta is not created multiple times.
    319   mutable bool final_delta_created_ = false;
    320 
    321   DISALLOW_COPY_AND_ASSIGN(Histogram);
    322 };
    323 
    324 //------------------------------------------------------------------------------
    325 
    326 // LinearHistogram is a more traditional histogram, with evenly spaced
    327 // buckets.
    328 class BASE_EXPORT LinearHistogram : public Histogram {
    329  public:
    330   ~LinearHistogram() override;
    331 
    332   /* minimum should start from 1. 0 is as minimum is invalid. 0 is an implicit
    333      default underflow bucket. */
    334   static HistogramBase* FactoryGet(const std::string& name,
    335                                    Sample minimum,
    336                                    Sample maximum,
    337                                    uint32_t bucket_count,
    338                                    int32_t flags);
    339   static HistogramBase* FactoryTimeGet(const std::string& name,
    340                                        TimeDelta minimum,
    341                                        TimeDelta maximum,
    342                                        uint32_t bucket_count,
    343                                        int32_t flags);
    344 
    345   // Overloads of the above two functions that take a const char* |name| param,
    346   // to avoid code bloat from the std::string constructor being inlined into
    347   // call sites.
    348   static HistogramBase* FactoryGet(const char* name,
    349                                    Sample minimum,
    350                                    Sample maximum,
    351                                    uint32_t bucket_count,
    352                                    int32_t flags);
    353   static HistogramBase* FactoryTimeGet(const char* name,
    354                                        TimeDelta minimum,
    355                                        TimeDelta maximum,
    356                                        uint32_t bucket_count,
    357                                        int32_t flags);
    358 
    359   // Create a histogram using data in persistent storage.
    360   static std::unique_ptr<HistogramBase> PersistentCreate(
    361       const std::string& name,
    362       Sample minimum,
    363       Sample maximum,
    364       const BucketRanges* ranges,
    365       HistogramBase::AtomicCount* counts,
    366       HistogramBase::AtomicCount* logged_counts,
    367       uint32_t counts_size,
    368       HistogramSamples::Metadata* meta,
    369       HistogramSamples::Metadata* logged_meta);
    370 
    371   struct DescriptionPair {
    372     Sample sample;
    373     const char* description;  // Null means end of a list of pairs.
    374   };
    375 
    376   // Create a LinearHistogram and store a list of number/text values for use in
    377   // writing the histogram graph.
    378   // |descriptions| can be NULL, which means no special descriptions to set. If
    379   // it's not NULL, the last element in the array must has a NULL in its
    380   // "description" field.
    381   static HistogramBase* FactoryGetWithRangeDescription(
    382       const std::string& name,
    383       Sample minimum,
    384       Sample maximum,
    385       uint32_t bucket_count,
    386       int32_t flags,
    387       const DescriptionPair descriptions[]);
    388 
    389   static void InitializeBucketRanges(Sample minimum,
    390                                      Sample maximum,
    391                                      BucketRanges* ranges);
    392 
    393   // Overridden from Histogram:
    394   HistogramType GetHistogramType() const override;
    395 
    396  protected:
    397   class Factory;
    398 
    399   LinearHistogram(const std::string& name,
    400                   Sample minimum,
    401                   Sample maximum,
    402                   const BucketRanges* ranges);
    403 
    404   LinearHistogram(const std::string& name,
    405                   Sample minimum,
    406                   Sample maximum,
    407                   const BucketRanges* ranges,
    408                   HistogramBase::AtomicCount* counts,
    409                   HistogramBase::AtomicCount* logged_counts,
    410                   uint32_t counts_size,
    411                   HistogramSamples::Metadata* meta,
    412                   HistogramSamples::Metadata* logged_meta);
    413 
    414   double GetBucketSize(Count current, uint32_t i) const override;
    415 
    416   // If we have a description for a bucket, then return that.  Otherwise
    417   // let parent class provide a (numeric) description.
    418   const std::string GetAsciiBucketRange(uint32_t i) const override;
    419 
    420   // Skip printing of name for numeric range if we have a name (and if this is
    421   // an empty bucket).
    422   bool PrintEmptyBucket(uint32_t index) const override;
    423 
    424  private:
    425   friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
    426       base::PickleIterator* iter);
    427   static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
    428 
    429   // For some ranges, we store a printable description of a bucket range.
    430   // If there is no description, then GetAsciiBucketRange() uses parent class
    431   // to provide a description.
    432   typedef std::map<Sample, std::string> BucketDescriptionMap;
    433   BucketDescriptionMap bucket_description_;
    434 
    435   DISALLOW_COPY_AND_ASSIGN(LinearHistogram);
    436 };
    437 
    438 //------------------------------------------------------------------------------
    439 
    440 // BooleanHistogram is a histogram for booleans.
    441 class BASE_EXPORT BooleanHistogram : public LinearHistogram {
    442  public:
    443   static HistogramBase* FactoryGet(const std::string& name, int32_t flags);
    444 
    445   // Overload of the above function that takes a const char* |name| param,
    446   // to avoid code bloat from the std::string constructor being inlined into
    447   // call sites.
    448   static HistogramBase* FactoryGet(const char* name, int32_t flags);
    449 
    450   // Create a histogram using data in persistent storage.
    451   static std::unique_ptr<HistogramBase> PersistentCreate(
    452       const std::string& name,
    453       const BucketRanges* ranges,
    454       HistogramBase::AtomicCount* counts,
    455       HistogramBase::AtomicCount* logged_counts,
    456       HistogramSamples::Metadata* meta,
    457       HistogramSamples::Metadata* logged_meta);
    458 
    459   HistogramType GetHistogramType() const override;
    460 
    461  protected:
    462   class Factory;
    463 
    464  private:
    465   BooleanHistogram(const std::string& name, const BucketRanges* ranges);
    466   BooleanHistogram(const std::string& name,
    467                    const BucketRanges* ranges,
    468                    HistogramBase::AtomicCount* counts,
    469                    HistogramBase::AtomicCount* logged_counts,
    470                    HistogramSamples::Metadata* meta,
    471                    HistogramSamples::Metadata* logged_meta);
    472 
    473   friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
    474       base::PickleIterator* iter);
    475   static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
    476 
    477   DISALLOW_COPY_AND_ASSIGN(BooleanHistogram);
    478 };
    479 
    480 //------------------------------------------------------------------------------
    481 
    482 // CustomHistogram is a histogram for a set of custom integers.
    483 class BASE_EXPORT CustomHistogram : public Histogram {
    484  public:
    485   // |custom_ranges| contains a vector of limits on ranges. Each limit should be
    486   // > 0 and < kSampleType_MAX. (Currently 0 is still accepted for backward
    487   // compatibility). The limits can be unordered or contain duplication, but
    488   // client should not depend on this.
    489   static HistogramBase* FactoryGet(const std::string& name,
    490                                    const std::vector<Sample>& custom_ranges,
    491                                    int32_t flags);
    492 
    493   // Overload of the above function that takes a const char* |name| param,
    494   // to avoid code bloat from the std::string constructor being inlined into
    495   // call sites.
    496   static HistogramBase* FactoryGet(const char* name,
    497                                    const std::vector<Sample>& custom_ranges,
    498                                    int32_t flags);
    499 
    500   // Create a histogram using data in persistent storage.
    501   static std::unique_ptr<HistogramBase> PersistentCreate(
    502       const std::string& name,
    503       const BucketRanges* ranges,
    504       HistogramBase::AtomicCount* counts,
    505       HistogramBase::AtomicCount* logged_counts,
    506       uint32_t counts_size,
    507       HistogramSamples::Metadata* meta,
    508       HistogramSamples::Metadata* logged_meta);
    509 
    510   // Overridden from Histogram:
    511   HistogramType GetHistogramType() const override;
    512 
    513   // Helper method for transforming an array of valid enumeration values
    514   // to the std::vector<int> expected by UMA_HISTOGRAM_CUSTOM_ENUMERATION.
    515   // This function ensures that a guard bucket exists right after any
    516   // valid sample value (unless the next higher sample is also a valid value),
    517   // so that invalid samples never fall into the same bucket as valid samples.
    518   // TODO(kaiwang): Change name to ArrayToCustomEnumRanges.
    519   static std::vector<Sample> ArrayToCustomRanges(const Sample* values,
    520                                                  uint32_t num_values);
    521  protected:
    522   class Factory;
    523 
    524   CustomHistogram(const std::string& name,
    525                   const BucketRanges* ranges);
    526 
    527   CustomHistogram(const std::string& name,
    528                   const BucketRanges* ranges,
    529                   HistogramBase::AtomicCount* counts,
    530                   HistogramBase::AtomicCount* logged_counts,
    531                   uint32_t counts_size,
    532                   HistogramSamples::Metadata* meta,
    533                   HistogramSamples::Metadata* logged_meta);
    534 
    535   // HistogramBase implementation:
    536   bool SerializeInfoImpl(base::Pickle* pickle) const override;
    537 
    538   double GetBucketSize(Count current, uint32_t i) const override;
    539 
    540  private:
    541   friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
    542       base::PickleIterator* iter);
    543   static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
    544 
    545   static bool ValidateCustomRanges(const std::vector<Sample>& custom_ranges);
    546 
    547   DISALLOW_COPY_AND_ASSIGN(CustomHistogram);
    548 };
    549 
    550 }  // namespace base
    551 
    552 #endif  // BASE_METRICS_HISTOGRAM_H_
    553