1 // Copyright (c) 2008, Google Inc. 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are 6 // met: 7 // 8 // * Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above 11 // copyright notice, this list of conditions and the following disclaimer 12 // in the documentation and/or other materials provided with the 13 // distribution. 14 // * Neither the name of Google Inc. nor the names of its 15 // contributors may be used to endorse or promote products derived from 16 // this software without specific prior written permission. 17 // 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 // --- 31 // All Rights Reserved. 32 // 33 // Author: Daniel Ford 34 35 #ifndef TCMALLOC_SAMPLER_H_ 36 #define TCMALLOC_SAMPLER_H_ 37 38 #include "config.h" 39 #include <stddef.h> // for size_t 40 #ifdef HAVE_STDINT_H 41 #include <stdint.h> // for uint64_t, uint32_t, int32_t 42 #endif 43 #include <string.h> // for memcpy 44 #include "base/basictypes.h" // for ASSERT 45 #include "internal_logging.h" // for ASSERT 46 47 namespace tcmalloc { 48 49 //------------------------------------------------------------------- 50 // Sampler to decide when to create a sample trace for an allocation 51 // Not thread safe: Each thread should have it's own sampler object. 52 // Caller must use external synchronization if used 53 // from multiple threads. 54 // 55 // With 512K average sample step (the default): 56 // the probability of sampling a 4K allocation is about 0.00778 57 // the probability of sampling a 1MB allocation is about 0.865 58 // the probability of sampling a 1GB allocation is about 1.00000 59 // In general, the probablity of sampling is an allocation of size X 60 // given a flag value of Y (default 1M) is: 61 // 1 - e^(-X/Y) 62 // 63 // With 128K average sample step: 64 // the probability of sampling a 1MB allocation is about 0.99966 65 // the probability of sampling a 1GB allocation is about 1.0 66 // (about 1 - 2**(-26)) 67 // With 1M average sample step: 68 // the probability of sampling a 4K allocation is about 0.00390 69 // the probability of sampling a 1MB allocation is about 0.632 70 // the probability of sampling a 1GB allocation is about 1.0 71 // 72 // The sampler works by representing memory as a long stream from 73 // which allocations are taken. Some of the bytes in this stream are 74 // marked and if an allocation includes a marked byte then it is 75 // sampled. Bytes are marked according to a Poisson point process 76 // with each byte being marked independently with probability 77 // p = 1/tcmalloc_sample_parameter. This makes the probability 78 // of sampling an allocation of X bytes equal to the CDF of 79 // a geometric with mean tcmalloc_sample_parameter. (ie. the 80 // probability that at least one byte in the range is marked). This 81 // is accurately given by the CDF of the corresponding exponential 82 // distribution : 1 - e^(X/tcmalloc_sample_parameter_) 83 // Independence of the byte marking ensures independence of 84 // the sampling of each allocation. 85 // 86 // This scheme is implemented by noting that, starting from any 87 // fixed place, the number of bytes until the next marked byte 88 // is geometrically distributed. This number is recorded as 89 // bytes_until_sample_. Every allocation subtracts from this 90 // number until it is less than 0. When this happens the current 91 // allocation is sampled. 92 // 93 // When an allocation occurs, bytes_until_sample_ is reset to 94 // a new independtly sampled geometric number of bytes. The 95 // memoryless property of the point process means that this may 96 // be taken as the number of bytes after the end of the current 97 // allocation until the next marked byte. This ensures that 98 // very large allocations which would intersect many marked bytes 99 // only result in a single call to PickNextSamplingPoint. 100 //------------------------------------------------------------------- 101 102 class PERFTOOLS_DLL_DECL Sampler { 103 public: 104 // Initialize this sampler. 105 // Passing a seed of 0 gives a non-deterministic 106 // seed value given by casting the object ("this") 107 void Init(uint32_t seed); 108 void Cleanup(); 109 110 // Record allocation of "k" bytes. Return true iff allocation 111 // should be sampled 112 bool SampleAllocation(size_t k); 113 114 // Generate a geometric with mean 512K (or FLAG_tcmalloc_sample_parameter) 115 size_t PickNextSamplingPoint(); 116 117 // Initialize the statics for the Sampler class 118 static void InitStatics(); 119 120 // Returns the current sample period 121 int GetSamplePeriod(); 122 123 // The following are public for the purposes of testing 124 static uint64_t NextRandom(uint64_t rnd_); // Returns the next prng value 125 static double FastLog2(const double & d); // Computes Log2(x) quickly 126 static void PopulateFastLog2Table(); // Populate the lookup table 127 128 private: 129 size_t bytes_until_sample_; // Bytes until we sample next 130 uint64_t rnd_; // Cheap random number generator 131 132 // Statics for the fast log 133 // Note that this code may not depend on anything in //util 134 // hence the duplication of functionality here 135 static const int kFastlogNumBits = 10; 136 static const int kFastlogMask = (1 << kFastlogNumBits) - 1; 137 static double log_table_[1<<kFastlogNumBits]; // Constant 138 }; 139 140 inline bool Sampler::SampleAllocation(size_t k) { 141 if (bytes_until_sample_ < k) { 142 bytes_until_sample_ = PickNextSamplingPoint(); 143 return true; 144 } else { 145 bytes_until_sample_ -= k; 146 return false; 147 } 148 } 149 150 // Inline functions which are public for testing purposes 151 152 // Returns the next prng value. 153 // pRNG is: aX+b mod c with a = 0x5DEECE66D, b = 0xB, c = 1<<48 154 // This is the lrand64 generator. 155 inline uint64_t Sampler::NextRandom(uint64_t rnd) { 156 const uint64_t prng_mult = 0x5DEECE66DLL; 157 const uint64_t prng_add = 0xB; 158 const uint64_t prng_mod_power = 48; 159 const uint64_t prng_mod_mask = 160 ~((~static_cast<uint64_t>(0)) << prng_mod_power); 161 return (prng_mult * rnd + prng_add) & prng_mod_mask; 162 } 163 164 // Adapted from //util/math/fastmath.[h|cc] by Noam Shazeer 165 // This mimics the VeryFastLog2 code in those files 166 inline double Sampler::FastLog2(const double & d) { 167 ASSERT(d>0); 168 COMPILE_ASSERT(sizeof(d) == sizeof(uint64_t), DoubleMustBe64Bits); 169 uint64_t x; 170 memcpy(&x, &d, sizeof(x)); // we depend on the compiler inlining this 171 const uint32_t x_high = x >> 32; 172 const uint32_t y = x_high >> (20 - kFastlogNumBits) & kFastlogMask; 173 const int32_t exponent = ((x_high >> 20) & 0x7FF) - 1023; 174 return exponent + log_table_[y]; 175 } 176 177 } // namespace tcmalloc 178 179 #endif // TCMALLOC_SAMPLER_H_ 180