1 // Copyright 2015 The Gemmlowp Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // instrumentation.h: contains the definitions needed to 16 // instrument code for profiling: 17 // ScopedProfilingLabel, RegisterCurrentThreadForProfiling. 18 // 19 // profiler.h is only needed to drive the profiler: 20 // StartProfiling, FinishProfiling. 21 // 22 // See the usage example in profiler.h. 23 24 #ifndef GEMMLOWP_PROFILING_INSTRUMENTATION_H_ 25 #define GEMMLOWP_PROFILING_INSTRUMENTATION_H_ 26 27 #include <cstdio> 28 29 #ifndef GEMMLOWP_USE_STLPORT 30 #include <cstdint> 31 #else 32 #include <stdint.h> 33 namespace std { 34 using ::int16_t; 35 using ::int32_t; 36 using ::int8_t; 37 using ::size_t; 38 using ::uint16_t; 39 using ::uint32_t; 40 using ::uint8_t; 41 using ::uintptr_t; 42 } // namespace std 43 #endif 44 45 #include <algorithm> 46 #include <cassert> 47 #include <cstdlib> 48 49 #ifdef GEMMLOWP_PROFILING 50 #include <cstring> 51 #include <set> 52 #endif 53 54 #include "./pthread_everywhere.h" 55 56 namespace gemmlowp { 57 58 inline void ReleaseBuildAssertion(bool condition, const char* msg) { 59 if (!condition) { 60 fprintf(stderr, "gemmlowp error: %s\n", msg); 61 abort(); 62 } 63 } 64 65 class Mutex { 66 public: 67 Mutex(const Mutex&) = delete; 68 Mutex& operator=(const Mutex&) = delete; 69 70 Mutex() { pthread_mutex_init(&m, NULL); } 71 ~Mutex() { pthread_mutex_destroy(&m); } 72 73 void Lock() { pthread_mutex_lock(&m); } 74 void Unlock() { pthread_mutex_unlock(&m); } 75 76 private: 77 pthread_mutex_t m; 78 }; 79 80 class GlobalMutexes { 81 public: 82 static Mutex* Profiler() { 83 static Mutex m; 84 return &m; 85 } 86 87 static Mutex* EightBitIntGemm() { 88 static Mutex m; 89 return &m; 90 } 91 }; 92 93 // A very simple RAII helper to lock and unlock a Mutex 94 struct ScopedLock { 95 ScopedLock(Mutex* m) : _m(m) { _m->Lock(); } 96 ~ScopedLock() { _m->Unlock(); } 97 98 private: 99 Mutex* _m; 100 }; 101 102 // Profiling definitions. Two paths: when profiling is enabled, 103 // and when profiling is disabled. 104 #ifdef GEMMLOWP_PROFILING 105 // This code path is when profiling is enabled. 106 107 // A pseudo-call-stack. Contrary to a real call-stack, this only 108 // contains pointers to literal strings that were manually entered 109 // in the instrumented code (see ScopedProfilingLabel). 110 struct ProfilingStack { 111 static const std::size_t kMaxSize = 14; 112 typedef const char* LabelsArrayType[kMaxSize]; 113 LabelsArrayType labels; 114 std::size_t size; 115 Mutex* lock; 116 117 ProfilingStack() { memset(this, 0, sizeof(ProfilingStack)); } 118 119 void Push(const char* label) { 120 ScopedLock sl(lock); 121 ReleaseBuildAssertion(size < kMaxSize, "ProfilingStack overflow"); 122 labels[size] = label; 123 size++; 124 } 125 126 void Pop() { 127 ScopedLock sl(lock); 128 ReleaseBuildAssertion(size > 0, "ProfilingStack underflow"); 129 size--; 130 } 131 132 void UpdateTop(const char* new_label) { 133 ScopedLock sl(lock); 134 assert(size); 135 labels[size - 1] = new_label; 136 } 137 138 ProfilingStack& operator=(const ProfilingStack& other) { 139 memcpy(this, &other, sizeof(ProfilingStack)); 140 return *this; 141 } 142 143 bool operator==(const ProfilingStack& other) const { 144 return !memcmp(this, &other, sizeof(ProfilingStack)); 145 } 146 }; 147 148 static_assert( 149 !(sizeof(ProfilingStack) & (sizeof(ProfilingStack) - 1)), 150 "ProfilingStack should have power-of-two size to fit in cache lines"); 151 152 struct ThreadInfo; 153 154 // The global set of threads being profiled. 155 inline std::set<ThreadInfo*>& ThreadsUnderProfiling() { 156 static std::set<ThreadInfo*> v; 157 return v; 158 } 159 160 struct ThreadInfo { 161 pthread_key_t key; // used only to get a callback at thread exit. 162 ProfilingStack stack; 163 164 ThreadInfo() { 165 pthread_key_create(&key, ThreadExitCallback); 166 pthread_setspecific(key, this); 167 stack.lock = new Mutex(); 168 } 169 170 static void ThreadExitCallback(void* ptr) { 171 ScopedLock sl(GlobalMutexes::Profiler()); 172 ThreadInfo* self = static_cast<ThreadInfo*>(ptr); 173 ThreadsUnderProfiling().erase(self); 174 pthread_key_delete(self->key); 175 delete self->stack.lock; 176 } 177 }; 178 179 inline ThreadInfo& ThreadLocalThreadInfo() { 180 static pthread_key_t key; 181 static auto DeleteThreadInfo = [](void* threadInfoPtr) { 182 ThreadInfo* threadInfo = static_cast<ThreadInfo*>(threadInfoPtr); 183 if (threadInfo) { 184 delete threadInfo; 185 } 186 }; 187 188 static int key_result = pthread_key_create(&key, DeleteThreadInfo); 189 190 ThreadInfo* threadInfo = static_cast<ThreadInfo*>(pthread_getspecific(key)); 191 if (!threadInfo) { 192 threadInfo = new ThreadInfo(); 193 pthread_setspecific(key, threadInfo); 194 } 195 return *threadInfo; 196 } 197 198 // ScopedProfilingLabel is how one instruments code for profiling 199 // with this profiler. Construct local ScopedProfilingLabel variables, 200 // passing a literal string describing the local code. Profile 201 // samples will then be annotated with this label, while it is in scope 202 // (whence the name --- also known as RAII). 203 // See the example in profiler.h. 204 class ScopedProfilingLabel { 205 ProfilingStack* profiling_stack_; 206 207 public: 208 explicit ScopedProfilingLabel(const char* label) 209 : profiling_stack_(&ThreadLocalThreadInfo().stack) { 210 profiling_stack_->Push(label); 211 } 212 213 ~ScopedProfilingLabel() { profiling_stack_->Pop(); } 214 215 void Update(const char* new_label) { profiling_stack_->UpdateTop(new_label); } 216 }; 217 218 // To be called once on each thread to be profiled. 219 inline void RegisterCurrentThreadForProfiling() { 220 ScopedLock sl(GlobalMutexes::Profiler()); 221 ThreadsUnderProfiling().insert(&ThreadLocalThreadInfo()); 222 } 223 224 #else // not GEMMLOWP_PROFILING 225 // This code path is when profiling is disabled. 226 227 // This empty definition of ScopedProfilingLabel ensures that 228 // it has zero runtime overhead when profiling is disabled. 229 struct ScopedProfilingLabel { 230 explicit ScopedProfilingLabel(const char*) {} 231 void Update(const char*) {} 232 }; 233 234 inline void RegisterCurrentThreadForProfiling() {} 235 236 #endif 237 238 } // end namespace gemmlowp 239 240 #endif // GEMMLOWP_PROFILING_INSTRUMENTATION_H_ 241