Home | History | Annotate | Download | only in src
      1 // Copyright (c) 2008, Google Inc.
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are
      6 // met:
      7 //
      8 //     * Redistributions of source code must retain the above copyright
      9 // notice, this list of conditions and the following disclaimer.
     10 //     * Redistributions in binary form must reproduce the above
     11 // copyright notice, this list of conditions and the following disclaimer
     12 // in the documentation and/or other materials provided with the
     13 // distribution.
     14 //     * Neither the name of Google Inc. nor the names of its
     15 // contributors may be used to endorse or promote products derived from
     16 // this software without specific prior written permission.
     17 //
     18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30 // ---
     31 // All Rights Reserved.
     32 //
     33 // Author: Daniel Ford
     34 
     35 #ifndef TCMALLOC_SAMPLER_H_
     36 #define TCMALLOC_SAMPLER_H_
     37 
     38 #include "config.h"
     39 #include <stddef.h>                     // for size_t
     40 #ifdef HAVE_STDINT_H
     41 #include <stdint.h>                     // for uint64_t, uint32_t, int32_t
     42 #endif
     43 #include <string.h>                     // for memcpy
     44 #include "base/basictypes.h"  // for ASSERT
     45 #include "internal_logging.h"  // for ASSERT
     46 
     47 namespace tcmalloc {
     48 
     49 //-------------------------------------------------------------------
     50 // Sampler to decide when to create a sample trace for an allocation
     51 // Not thread safe: Each thread should have it's own sampler object.
     52 // Caller must use external synchronization if used
     53 // from multiple threads.
     54 //
     55 // With 512K average sample step (the default):
     56 //  the probability of sampling a 4K allocation is about 0.00778
     57 //  the probability of sampling a 1MB allocation is about 0.865
     58 //  the probability of sampling a 1GB allocation is about 1.00000
     59 // In general, the probablity of sampling is an allocation of size X
     60 // given a flag value of Y (default 1M) is:
     61 //  1 - e^(-X/Y)
     62 //
     63 // With 128K average sample step:
     64 //  the probability of sampling a 1MB allocation is about 0.99966
     65 //  the probability of sampling a 1GB allocation is about 1.0
     66 //  (about 1 - 2**(-26))
     67 // With 1M average sample step:
     68 //  the probability of sampling a 4K allocation is about 0.00390
     69 //  the probability of sampling a 1MB allocation is about 0.632
     70 //  the probability of sampling a 1GB allocation is about 1.0
     71 //
     72 // The sampler works by representing memory as a long stream from
     73 // which allocations are taken. Some of the bytes in this stream are
     74 // marked and if an allocation includes a marked byte then it is
     75 // sampled. Bytes are marked according to a Poisson point process
     76 // with each byte being marked independently with probability
     77 // p = 1/tcmalloc_sample_parameter.  This makes the probability
     78 // of sampling an allocation of X bytes equal to the CDF of
     79 // a geometric with mean tcmalloc_sample_parameter. (ie. the
     80 // probability that at least one byte in the range is marked). This
     81 // is accurately given by the CDF of the corresponding exponential
     82 // distribution : 1 - e^(X/tcmalloc_sample_parameter_)
     83 // Independence of the byte marking ensures independence of
     84 // the sampling of each allocation.
     85 //
     86 // This scheme is implemented by noting that, starting from any
     87 // fixed place, the number of bytes until the next marked byte
     88 // is geometrically distributed. This number is recorded as
     89 // bytes_until_sample_.  Every allocation subtracts from this
     90 // number until it is less than 0. When this happens the current
     91 // allocation is sampled.
     92 //
     93 // When an allocation occurs, bytes_until_sample_ is reset to
     94 // a new independtly sampled geometric number of bytes. The
     95 // memoryless property of the point process means that this may
     96 // be taken as the number of bytes after the end of the current
     97 // allocation until the next marked byte. This ensures that
     98 // very large allocations which would intersect many marked bytes
     99 // only result in a single call to PickNextSamplingPoint.
    100 //-------------------------------------------------------------------
    101 
    102 class PERFTOOLS_DLL_DECL Sampler {
    103  public:
    104   // Initialize this sampler.
    105   // Passing a seed of 0 gives a non-deterministic
    106   // seed value given by casting the object ("this")
    107   void Init(uint32_t seed);
    108   void Cleanup();
    109 
    110   // Record allocation of "k" bytes.  Return true iff allocation
    111   // should be sampled
    112   bool SampleAllocation(size_t k);
    113 
    114   // Generate a geometric with mean 512K (or FLAG_tcmalloc_sample_parameter)
    115   size_t PickNextSamplingPoint();
    116 
    117   // Initialize the statics for the Sampler class
    118   static void InitStatics();
    119 
    120   // Returns the current sample period
    121   int GetSamplePeriod();
    122 
    123   // The following are public for the purposes of testing
    124   static uint64_t NextRandom(uint64_t rnd_);  // Returns the next prng value
    125   static double FastLog2(const double & d);  // Computes Log2(x) quickly
    126   static void PopulateFastLog2Table();  // Populate the lookup table
    127 
    128  private:
    129   size_t        bytes_until_sample_;    // Bytes until we sample next
    130   uint64_t      rnd_;                   // Cheap random number generator
    131 
    132   // Statics for the fast log
    133   // Note that this code may not depend on anything in //util
    134   // hence the duplication of functionality here
    135   static const int kFastlogNumBits = 10;
    136   static const int kFastlogMask = (1 << kFastlogNumBits) - 1;
    137   static double log_table_[1<<kFastlogNumBits];  // Constant
    138 };
    139 
    140 inline bool Sampler::SampleAllocation(size_t k) {
    141   if (bytes_until_sample_ < k) {
    142     bytes_until_sample_ = PickNextSamplingPoint();
    143     return true;
    144   } else {
    145     bytes_until_sample_ -= k;
    146     return false;
    147   }
    148 }
    149 
    150 // Inline functions which are public for testing purposes
    151 
    152 // Returns the next prng value.
    153 // pRNG is: aX+b mod c with a = 0x5DEECE66D, b =  0xB, c = 1<<48
    154 // This is the lrand64 generator.
    155 inline uint64_t Sampler::NextRandom(uint64_t rnd) {
    156   const uint64_t prng_mult = 0x5DEECE66DLL;
    157   const uint64_t prng_add = 0xB;
    158   const uint64_t prng_mod_power = 48;
    159   const uint64_t prng_mod_mask =
    160                 ~((~static_cast<uint64_t>(0)) << prng_mod_power);
    161   return (prng_mult * rnd + prng_add) & prng_mod_mask;
    162 }
    163 
    164 // Adapted from //util/math/fastmath.[h|cc] by Noam Shazeer
    165 // This mimics the VeryFastLog2 code in those files
    166 inline double Sampler::FastLog2(const double & d) {
    167   ASSERT(d>0);
    168   COMPILE_ASSERT(sizeof(d) == sizeof(uint64_t), DoubleMustBe64Bits);
    169   uint64_t x;
    170   memcpy(&x, &d, sizeof(x));   // we depend on the compiler inlining this
    171   const uint32_t x_high = x >> 32;
    172   const uint32_t y = x_high >> (20 - kFastlogNumBits) & kFastlogMask;
    173   const int32_t exponent = ((x_high >> 20) & 0x7FF) - 1023;
    174   return exponent + log_table_[y];
    175 }
    176 
    177 }  // namespace tcmalloc
    178 
    179 #endif  // TCMALLOC_SAMPLER_H_
    180