Home | History | Annotate | Download | only in profiling
      1 // Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 // instrumentation.h: contains the definitions needed to
     16 // instrument code for profiling:
     17 //   ScopedProfilingLabel, RegisterCurrentThreadForProfiling.
     18 //
     19 // profiler.h is only needed to drive the profiler:
     20 //   StartProfiling, FinishProfiling.
     21 //
     22 // See the usage example in profiler.h.
     23 
     24 #ifndef GEMMLOWP_PROFILING_INSTRUMENTATION_H_
     25 #define GEMMLOWP_PROFILING_INSTRUMENTATION_H_
     26 
     27 #include <cstdio>
     28 
     29 #ifndef GEMMLOWP_USE_STLPORT
     30 #include <cstdint>
     31 #else
     32 #include <stdint.h>
     33 namespace std {
     34 using ::int16_t;
     35 using ::int32_t;
     36 using ::int8_t;
     37 using ::size_t;
     38 using ::uint16_t;
     39 using ::uint32_t;
     40 using ::uint8_t;
     41 using ::uintptr_t;
     42 }  // namespace std
     43 #endif
     44 
     45 #include <algorithm>
     46 #include <cassert>
     47 #include <cstdlib>
     48 
     49 #ifdef GEMMLOWP_PROFILING
     50 #include <cstring>
     51 #include <set>
     52 #endif
     53 
     54 #include "./pthread_everywhere.h"
     55 
     56 namespace gemmlowp {
     57 
     58 inline void ReleaseBuildAssertion(bool condition, const char* msg) {
     59   if (!condition) {
     60     fprintf(stderr, "gemmlowp error: %s\n", msg);
     61     abort();
     62   }
     63 }
     64 
     65 class Mutex {
     66  public:
     67   Mutex(const Mutex&) = delete;
     68   Mutex& operator=(const Mutex&) = delete;
     69 
     70   Mutex() { pthread_mutex_init(&m, NULL); }
     71   ~Mutex() { pthread_mutex_destroy(&m); }
     72 
     73   void Lock() { pthread_mutex_lock(&m); }
     74   void Unlock() { pthread_mutex_unlock(&m); }
     75 
     76  private:
     77   pthread_mutex_t m;
     78 };
     79 
     80 class GlobalMutexes {
     81  public:
     82   static Mutex* Profiler() {
     83     static Mutex m;
     84     return &m;
     85   }
     86 
     87   static Mutex* EightBitIntGemm() {
     88     static Mutex m;
     89     return &m;
     90   }
     91 };
     92 
     93 // A very simple RAII helper to lock and unlock a Mutex
     94 struct ScopedLock {
     95   ScopedLock(Mutex* m) : _m(m) { _m->Lock(); }
     96   ~ScopedLock() { _m->Unlock(); }
     97 
     98  private:
     99   Mutex* _m;
    100 };
    101 
    102 // Profiling definitions. Two paths: when profiling is enabled,
    103 // and when profiling is disabled.
    104 #ifdef GEMMLOWP_PROFILING
    105 // This code path is when profiling is enabled.
    106 
    107 // A pseudo-call-stack. Contrary to a real call-stack, this only
    108 // contains pointers to literal strings that were manually entered
    109 // in the instrumented code (see ScopedProfilingLabel).
    110 struct ProfilingStack {
    111   static const std::size_t kMaxSize = 14;
    112   typedef const char* LabelsArrayType[kMaxSize];
    113   LabelsArrayType labels;
    114   std::size_t size;
    115   Mutex* lock;
    116 
    117   ProfilingStack() { memset(this, 0, sizeof(ProfilingStack)); }
    118 
    119   void Push(const char* label) {
    120     ScopedLock sl(lock);
    121     ReleaseBuildAssertion(size < kMaxSize, "ProfilingStack overflow");
    122     labels[size] = label;
    123     size++;
    124   }
    125 
    126   void Pop() {
    127     ScopedLock sl(lock);
    128     ReleaseBuildAssertion(size > 0, "ProfilingStack underflow");
    129     size--;
    130   }
    131 
    132   void UpdateTop(const char* new_label) {
    133     ScopedLock sl(lock);
    134     assert(size);
    135     labels[size - 1] = new_label;
    136   }
    137 
    138   ProfilingStack& operator=(const ProfilingStack& other) {
    139     memcpy(this, &other, sizeof(ProfilingStack));
    140     return *this;
    141   }
    142 
    143   bool operator==(const ProfilingStack& other) const {
    144     return !memcmp(this, &other, sizeof(ProfilingStack));
    145   }
    146 };
    147 
    148 static_assert(
    149     !(sizeof(ProfilingStack) & (sizeof(ProfilingStack) - 1)),
    150     "ProfilingStack should have power-of-two size to fit in cache lines");
    151 
    152 struct ThreadInfo;
    153 
    154 // The global set of threads being profiled.
    155 inline std::set<ThreadInfo*>& ThreadsUnderProfiling() {
    156   static std::set<ThreadInfo*> v;
    157   return v;
    158 }
    159 
    160 struct ThreadInfo {
    161   pthread_key_t key;  // used only to get a callback at thread exit.
    162   ProfilingStack stack;
    163 
    164   ThreadInfo() {
    165     pthread_key_create(&key, ThreadExitCallback);
    166     pthread_setspecific(key, this);
    167     stack.lock = new Mutex();
    168   }
    169 
    170   static void ThreadExitCallback(void* ptr) {
    171     ScopedLock sl(GlobalMutexes::Profiler());
    172     ThreadInfo* self = static_cast<ThreadInfo*>(ptr);
    173     ThreadsUnderProfiling().erase(self);
    174     pthread_key_delete(self->key);
    175     delete self->stack.lock;
    176   }
    177 };
    178 
    179 inline ThreadInfo& ThreadLocalThreadInfo() {
    180   static pthread_key_t key;
    181   static auto DeleteThreadInfo = [](void* threadInfoPtr) {
    182     ThreadInfo* threadInfo = static_cast<ThreadInfo*>(threadInfoPtr);
    183     if (threadInfo) {
    184       delete threadInfo;
    185     }
    186   };
    187 
    188   static int key_result = pthread_key_create(&key, DeleteThreadInfo);
    189 
    190   ThreadInfo* threadInfo = static_cast<ThreadInfo*>(pthread_getspecific(key));
    191   if (!threadInfo) {
    192     threadInfo = new ThreadInfo();
    193     pthread_setspecific(key, threadInfo);
    194   }
    195   return *threadInfo;
    196 }
    197 
    198 // ScopedProfilingLabel is how one instruments code for profiling
    199 // with this profiler. Construct local ScopedProfilingLabel variables,
    200 // passing a literal string describing the local code. Profile
    201 // samples will then be annotated with this label, while it is in scope
    202 // (whence the name --- also known as RAII).
    203 // See the example in profiler.h.
    204 class ScopedProfilingLabel {
    205   ProfilingStack* profiling_stack_;
    206 
    207  public:
    208   explicit ScopedProfilingLabel(const char* label)
    209       : profiling_stack_(&ThreadLocalThreadInfo().stack) {
    210     profiling_stack_->Push(label);
    211   }
    212 
    213   ~ScopedProfilingLabel() { profiling_stack_->Pop(); }
    214 
    215   void Update(const char* new_label) { profiling_stack_->UpdateTop(new_label); }
    216 };
    217 
    218 // To be called once on each thread to be profiled.
    219 inline void RegisterCurrentThreadForProfiling() {
    220   ScopedLock sl(GlobalMutexes::Profiler());
    221   ThreadsUnderProfiling().insert(&ThreadLocalThreadInfo());
    222 }
    223 
    224 #else  // not GEMMLOWP_PROFILING
    225 // This code path is when profiling is disabled.
    226 
    227 // This empty definition of ScopedProfilingLabel ensures that
    228 // it has zero runtime overhead when profiling is disabled.
    229 struct ScopedProfilingLabel {
    230   explicit ScopedProfilingLabel(const char*) {}
    231   void Update(const char*) {}
    232 };
    233 
    234 inline void RegisterCurrentThreadForProfiling() {}
    235 
    236 #endif
    237 
    238 }  // end namespace gemmlowp
    239 
    240 #endif  // GEMMLOWP_PROFILING_INSTRUMENTATION_H_
    241