1 // Mini-benchmark for tsan VTS worst case performance 2 // Idea: 3 // 1) Spawn M + N threads (M >> N) 4 // We'll call the 'M' threads as 'garbage threads'. 5 // 2) Make sure all threads have created thus no TIDs were reused 6 // 3) Join the garbage threads 7 // 4) Do many sync operations on the remaining N threads 8 // 9 // It turns out that due to O(M+N) VTS complexity the (4) is much slower with 10 // when N is large. 11 // 12 // Some numbers: 13 // a) clang++ native O1 with n_iterations=200kk takes 14 // 5s regardless of M 15 // clang++ tsanv2 O1 with n_iterations=20kk takes 16 // 23.5s with M=200 17 // 11.5s with M=1 18 // i.e. tsanv2 is ~23x to ~47x slower than native, depends on M. 19 // b) g++ native O1 with n_iterations=200kk takes 20 // 5.5s regardless of M 21 // g++ tsanv1 O1 with n_iterations=2kk takes 22 // 39.5s with M=200 23 // 20.5s with M=1 24 // i.e. tsanv1 is ~370x to ~720x slower than native, depends on M. 25 26 #include <assert.h> 27 #include <pthread.h> 28 #include <stdio.h> 29 #include <stdlib.h> 30 31 class __attribute__((aligned(64))) Mutex { 32 public: 33 Mutex() { pthread_mutex_init(&m_, NULL); } 34 ~Mutex() { pthread_mutex_destroy(&m_); } 35 void Lock() { pthread_mutex_lock(&m_); } 36 void Unlock() { pthread_mutex_unlock(&m_); } 37 38 private: 39 pthread_mutex_t m_; 40 }; 41 42 const int kNumMutexes = 1024; 43 Mutex mutexes[kNumMutexes]; 44 45 int n_threads, n_iterations; 46 47 pthread_barrier_t all_threads_ready, main_threads_ready; 48 49 void* GarbageThread(void *unused) { 50 pthread_barrier_wait(&all_threads_ready); 51 return 0; 52 } 53 54 void *Thread(void *arg) { 55 long idx = (long)arg; 56 pthread_barrier_wait(&all_threads_ready); 57 58 // Wait for the main thread to join the garbage threads. 59 pthread_barrier_wait(&main_threads_ready); 60 61 printf("Thread %ld go!\n", idx); 62 int offset = idx * kNumMutexes / n_threads; 63 for (int i = 0; i < n_iterations; i++) { 64 mutexes[(offset + i) % kNumMutexes].Lock(); 65 mutexes[(offset + i) % kNumMutexes].Unlock(); 66 } 67 printf("Thread %ld done\n", idx); 68 return 0; 69 } 70 71 int main(int argc, char **argv) { 72 int n_garbage_threads; 73 if (argc == 1) { 74 n_threads = 2; 75 n_garbage_threads = 200; 76 n_iterations = 20000000; 77 } else if (argc == 4) { 78 n_threads = atoi(argv[1]); 79 assert(n_threads > 0 && n_threads <= 32); 80 n_garbage_threads = atoi(argv[2]); 81 assert(n_garbage_threads > 0 && n_garbage_threads <= 16000); 82 n_iterations = atoi(argv[3]); 83 } else { 84 printf("Usage: %s n_threads n_garbage_threads n_iterations\n", argv[0]); 85 return 1; 86 } 87 printf("%s: n_threads=%d n_garbage_threads=%d n_iterations=%d\n", 88 __FILE__, n_threads, n_garbage_threads, n_iterations); 89 90 pthread_barrier_init(&all_threads_ready, NULL, n_garbage_threads + n_threads + 1); 91 pthread_barrier_init(&main_threads_ready, NULL, n_threads + 1); 92 93 pthread_t *t = new pthread_t[n_threads]; 94 { 95 pthread_t *g_t = new pthread_t[n_garbage_threads]; 96 for (int i = 0; i < n_garbage_threads; i++) { 97 int status = pthread_create(&g_t[i], 0, GarbageThread, NULL); 98 assert(status == 0); 99 } 100 for (int i = 0; i < n_threads; i++) { 101 int status = pthread_create(&t[i], 0, Thread, (void*)i); 102 assert(status == 0); 103 } 104 pthread_barrier_wait(&all_threads_ready); 105 printf("All threads started! Killing the garbage threads.\n"); 106 for (int i = 0; i < n_garbage_threads; i++) { 107 pthread_join(g_t[i], 0); 108 } 109 delete [] g_t; 110 } 111 printf("Resuming the main threads.\n"); 112 pthread_barrier_wait(&main_threads_ready); 113 114 115 for (int i = 0; i < n_threads; i++) { 116 pthread_join(t[i], 0); 117 } 118 delete [] t; 119 return 0; 120 } 121