Home | History | Annotate | Download | only in benchmarks
      1 // Mini-benchmark for tsan VTS worst case performance
      2 // Idea:
      3 // 1) Spawn M + N threads (M >> N)
      4 //    We'll call the 'M' threads as 'garbage threads'.
      5 // 2) Make sure all threads have created thus no TIDs were reused
      6 // 3) Join the garbage threads
      7 // 4) Do many sync operations on the remaining N threads
      8 //
      9 // It turns out that due to O(M+N) VTS complexity the (4) is much slower with
     10 // when N is large.
     11 //
     12 // Some numbers:
     13 // a) clang++ native O1 with n_iterations=200kk takes
     14 //      5s regardless of M
     15 //    clang++ tsanv2 O1 with n_iterations=20kk takes
     16 //      23.5s with M=200
     17 //      11.5s with M=1
     18 //    i.e. tsanv2 is ~23x to ~47x slower than native, depends on M.
     19 // b) g++ native O1 with n_iterations=200kk takes
     20 //      5.5s regardless of M
     21 //    g++ tsanv1 O1 with n_iterations=2kk takes
     22 //      39.5s with M=200
     23 //      20.5s with M=1
     24 //    i.e. tsanv1 is ~370x to ~720x slower than native, depends on M.
     25 
     26 #include <assert.h>
     27 #include <pthread.h>
     28 #include <stdio.h>
     29 #include <stdlib.h>
     30 
     31 class __attribute__((aligned(64))) Mutex {
     32  public:
     33   Mutex()  { pthread_mutex_init(&m_, NULL); }
     34   ~Mutex() { pthread_mutex_destroy(&m_); }
     35   void Lock() { pthread_mutex_lock(&m_); }
     36   void Unlock() { pthread_mutex_unlock(&m_); }
     37 
     38  private:
     39   pthread_mutex_t m_;
     40 };
     41 
     42 const int kNumMutexes = 1024;
     43 Mutex mutexes[kNumMutexes];
     44 
     45 int n_threads, n_iterations;
     46 
     47 pthread_barrier_t all_threads_ready, main_threads_ready;
     48 
     49 void* GarbageThread(void *unused) {
     50   pthread_barrier_wait(&all_threads_ready);
     51   return 0;
     52 }
     53 
     54 void *Thread(void *arg) {
     55   long idx = (long)arg;
     56   pthread_barrier_wait(&all_threads_ready);
     57 
     58   // Wait for the main thread to join the garbage threads.
     59   pthread_barrier_wait(&main_threads_ready);
     60 
     61   printf("Thread %ld go!\n", idx);
     62   int offset = idx * kNumMutexes / n_threads;
     63   for (int i = 0; i < n_iterations; i++) {
     64     mutexes[(offset + i) % kNumMutexes].Lock();
     65     mutexes[(offset + i) % kNumMutexes].Unlock();
     66   }
     67   printf("Thread %ld done\n", idx);
     68   return 0;
     69 }
     70 
     71 int main(int argc, char **argv) {
     72   int n_garbage_threads;
     73   if (argc == 1) {
     74     n_threads = 2;
     75     n_garbage_threads = 200;
     76     n_iterations = 20000000;
     77   } else if (argc == 4) {
     78     n_threads = atoi(argv[1]);
     79     assert(n_threads > 0 && n_threads <= 32);
     80     n_garbage_threads = atoi(argv[2]);
     81     assert(n_garbage_threads > 0 && n_garbage_threads <= 16000);
     82     n_iterations = atoi(argv[3]);
     83   } else {
     84     printf("Usage: %s n_threads n_garbage_threads n_iterations\n", argv[0]);
     85     return 1;
     86   }
     87   printf("%s: n_threads=%d n_garbage_threads=%d n_iterations=%d\n",
     88          __FILE__, n_threads, n_garbage_threads, n_iterations);
     89 
     90   pthread_barrier_init(&all_threads_ready, NULL, n_garbage_threads + n_threads + 1);
     91   pthread_barrier_init(&main_threads_ready, NULL, n_threads + 1);
     92 
     93   pthread_t *t = new pthread_t[n_threads];
     94   {
     95     pthread_t *g_t = new pthread_t[n_garbage_threads];
     96     for (int i = 0; i < n_garbage_threads; i++) {
     97       int status = pthread_create(&g_t[i], 0, GarbageThread, NULL);
     98       assert(status == 0);
     99     }
    100     for (int i = 0; i < n_threads; i++) {
    101       int status = pthread_create(&t[i], 0, Thread, (void*)i);
    102       assert(status == 0);
    103     }
    104     pthread_barrier_wait(&all_threads_ready);
    105     printf("All threads started! Killing the garbage threads.\n");
    106     for (int i = 0; i < n_garbage_threads; i++) {
    107       pthread_join(g_t[i], 0);
    108     }
    109     delete [] g_t;
    110   }
    111   printf("Resuming the main threads.\n");
    112   pthread_barrier_wait(&main_threads_ready);
    113 
    114 
    115   for (int i = 0; i < n_threads; i++) {
    116     pthread_join(t[i], 0);
    117   }
    118   delete [] t;
    119   return 0;
    120 }
    121