Home | History | Annotate | Download | only in threading
      1 // Copyright 2014 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/base_switches.h"
      6 #include "base/bind.h"
      7 #include "base/command_line.h"
      8 #include "base/memory/scoped_vector.h"
      9 #include "base/synchronization/condition_variable.h"
     10 #include "base/synchronization/lock.h"
     11 #include "base/synchronization/waitable_event.h"
     12 #include "base/threading/thread.h"
     13 #include "base/time/time.h"
     14 #include "build/build_config.h"
     15 #include "testing/gtest/include/gtest/gtest.h"
     16 #include "testing/perf/perf_test.h"
     17 
     18 #if defined(OS_POSIX)
     19 #include <pthread.h>
     20 #endif
     21 
     22 namespace base {
     23 
     24 namespace {
     25 
     26 const int kNumRuns = 100000;
     27 
     28 // Base class for a threading perf-test. This sets up some threads for the
     29 // test and measures the clock-time in addition to time spent on each thread.
     30 class ThreadPerfTest : public testing::Test {
     31  public:
     32   ThreadPerfTest()
     33       : done_(false, false) {
     34     // Disable the task profiler as it adds significant cost!
     35     CommandLine::Init(0, NULL);
     36     CommandLine::ForCurrentProcess()->AppendSwitchASCII(
     37         switches::kProfilerTiming,
     38         switches::kProfilerTimingDisabledValue);
     39   }
     40 
     41   // To be implemented by each test. Subclass must uses threads_ such that
     42   // their cpu-time can be measured. Test must return from PingPong() _and_
     43   // call FinishMeasurement from any thread to complete the test.
     44   virtual void Init() {}
     45   virtual void PingPong(int hops) = 0;
     46   virtual void Reset() {}
     47 
     48   void TimeOnThread(base::TimeTicks* ticks, base::WaitableEvent* done) {
     49     *ticks = base::TimeTicks::ThreadNow();
     50     done->Signal();
     51   }
     52 
     53   base::TimeTicks ThreadNow(base::Thread* thread) {
     54     base::WaitableEvent done(false, false);
     55     base::TimeTicks ticks;
     56     thread->message_loop_proxy()->PostTask(
     57         FROM_HERE,
     58         base::Bind(&ThreadPerfTest::TimeOnThread,
     59                    base::Unretained(this),
     60                    &ticks,
     61                    &done));
     62     done.Wait();
     63     return ticks;
     64   }
     65 
     66   void RunPingPongTest(const std::string& name, unsigned num_threads) {
     67     // Create threads and collect starting cpu-time for each thread.
     68     std::vector<base::TimeTicks> thread_starts;
     69     while (threads_.size() < num_threads) {
     70       threads_.push_back(new base::Thread("PingPonger"));
     71       threads_.back()->Start();
     72       if (base::TimeTicks::IsThreadNowSupported())
     73         thread_starts.push_back(ThreadNow(threads_.back()));
     74     }
     75 
     76     Init();
     77 
     78     base::TimeTicks start = base::TimeTicks::HighResNow();
     79     PingPong(kNumRuns);
     80     done_.Wait();
     81     base::TimeTicks end = base::TimeTicks::HighResNow();
     82 
     83     // Gather the cpu-time spent on each thread. This does one extra tasks,
     84     // but that should be in the noise given enough runs.
     85     base::TimeDelta thread_time;
     86     while (threads_.size()) {
     87       if (base::TimeTicks::IsThreadNowSupported()) {
     88         thread_time += ThreadNow(threads_.back()) - thread_starts.back();
     89         thread_starts.pop_back();
     90       }
     91       threads_.pop_back();
     92     }
     93 
     94     Reset();
     95 
     96     double num_runs = static_cast<double>(kNumRuns);
     97     double us_per_task_clock = (end - start).InMicroseconds() / num_runs;
     98     double us_per_task_cpu = thread_time.InMicroseconds() / num_runs;
     99 
    100     // Clock time per task.
    101     perf_test::PrintResult(
    102         "task", "", name + "_time ", us_per_task_clock, "us/hop", true);
    103 
    104     // Total utilization across threads if available (likely higher).
    105     if (base::TimeTicks::IsThreadNowSupported()) {
    106       perf_test::PrintResult(
    107           "task", "", name + "_cpu ", us_per_task_cpu, "us/hop", true);
    108     }
    109   }
    110 
    111  protected:
    112   void FinishMeasurement() { done_.Signal(); }
    113   ScopedVector<base::Thread> threads_;
    114 
    115  private:
    116   base::WaitableEvent done_;
    117 };
    118 
    119 // Class to test task performance by posting empty tasks back and forth.
    120 class TaskPerfTest : public ThreadPerfTest {
    121   base::Thread* NextThread(int count) {
    122     return threads_[count % threads_.size()];
    123   }
    124 
    125   virtual void PingPong(int hops) OVERRIDE {
    126     if (!hops) {
    127       FinishMeasurement();
    128       return;
    129     }
    130     NextThread(hops)->message_loop_proxy()->PostTask(
    131         FROM_HERE,
    132         base::Bind(
    133             &ThreadPerfTest::PingPong, base::Unretained(this), hops - 1));
    134   }
    135 };
    136 
    137 // This tries to test the 'best-case' as well as the 'worst-case' task posting
    138 // performance. The best-case keeps one thread alive such that it never yeilds,
    139 // while the worse-case forces a context switch for every task. Four threads are
    140 // used to ensure the threads do yeild (with just two it might be possible for
    141 // both threads to stay awake if they can signal each other fast enough).
    142 TEST_F(TaskPerfTest, TaskPingPong) {
    143   RunPingPongTest("1_Task_Threads", 1);
    144   RunPingPongTest("4_Task_Threads", 4);
    145 }
    146 
    147 
    148 // Same as above, but add observers to test their perf impact.
    149 class MessageLoopObserver : public base::MessageLoop::TaskObserver {
    150  public:
    151   virtual void WillProcessTask(const base::PendingTask& pending_task) OVERRIDE {
    152   }
    153   virtual void DidProcessTask(const base::PendingTask& pending_task) OVERRIDE {
    154   }
    155 };
    156 MessageLoopObserver message_loop_observer;
    157 
    158 class TaskObserverPerfTest : public TaskPerfTest {
    159  public:
    160   virtual void Init() OVERRIDE {
    161     TaskPerfTest::Init();
    162     for (size_t i = 0; i < threads_.size(); i++) {
    163       threads_[i]->message_loop()->AddTaskObserver(&message_loop_observer);
    164     }
    165   }
    166 };
    167 
    168 TEST_F(TaskObserverPerfTest, TaskPingPong) {
    169   RunPingPongTest("1_Task_Threads_With_Observer", 1);
    170   RunPingPongTest("4_Task_Threads_With_Observer", 4);
    171 }
    172 
    173 // Class to test our WaitableEvent performance by signaling back and fort.
    174 // WaitableEvent is templated so we can also compare with other versions.
    175 template <typename WaitableEventType>
    176 class EventPerfTest : public ThreadPerfTest {
    177  public:
    178   virtual void Init() OVERRIDE {
    179     for (size_t i = 0; i < threads_.size(); i++)
    180       events_.push_back(new WaitableEventType(false, false));
    181   }
    182 
    183   virtual void Reset() OVERRIDE { events_.clear(); }
    184 
    185   void WaitAndSignalOnThread(size_t event) {
    186     size_t next_event = (event + 1) % events_.size();
    187     int my_hops = 0;
    188     do {
    189       events_[event]->Wait();
    190       my_hops = --remaining_hops_;  // We own 'hops' between Wait and Signal.
    191       events_[next_event]->Signal();
    192     } while (my_hops > 0);
    193     // Once we are done, all threads will signal as hops passes zero.
    194     // We only signal completion once, on the thread that reaches zero.
    195     if (!my_hops)
    196       FinishMeasurement();
    197   }
    198 
    199   virtual void PingPong(int hops) OVERRIDE {
    200     remaining_hops_ = hops;
    201     for (size_t i = 0; i < threads_.size(); i++) {
    202       threads_[i]->message_loop_proxy()->PostTask(
    203           FROM_HERE,
    204           base::Bind(&EventPerfTest::WaitAndSignalOnThread,
    205                      base::Unretained(this),
    206                      i));
    207     }
    208 
    209     // Kick off the Signal ping-ponging.
    210     events_.front()->Signal();
    211   }
    212 
    213   int remaining_hops_;
    214   ScopedVector<WaitableEventType> events_;
    215 };
    216 
    217 // Similar to the task posting test, this just tests similar functionality
    218 // using WaitableEvents. We only test four threads (worst-case), but we
    219 // might want to craft a way to test the best-case (where the thread doesn't
    220 // end up blocking because the event is already signalled).
    221 typedef EventPerfTest<base::WaitableEvent> WaitableEventPerfTest;
    222 TEST_F(WaitableEventPerfTest, EventPingPong) {
    223   RunPingPongTest("4_WaitableEvent_Threads", 4);
    224 }
    225 
    226 // Build a minimal event using ConditionVariable.
    227 class ConditionVariableEvent {
    228  public:
    229   ConditionVariableEvent(bool manual_reset, bool initially_signaled)
    230       : cond_(&lock_), signaled_(false) {
    231     DCHECK(!manual_reset);
    232     DCHECK(!initially_signaled);
    233   }
    234 
    235   void Signal() {
    236     {
    237       base::AutoLock scoped_lock(lock_);
    238       signaled_ = true;
    239     }
    240     cond_.Signal();
    241   }
    242 
    243   void Wait() {
    244     base::AutoLock scoped_lock(lock_);
    245     while (!signaled_)
    246       cond_.Wait();
    247     signaled_ = false;
    248   }
    249 
    250  private:
    251   base::Lock lock_;
    252   base::ConditionVariable cond_;
    253   bool signaled_;
    254 };
    255 
    256 // This is meant to test the absolute minimal context switching time
    257 // using our own base synchronization code.
    258 typedef EventPerfTest<ConditionVariableEvent> ConditionVariablePerfTest;
    259 TEST_F(ConditionVariablePerfTest, EventPingPong) {
    260   RunPingPongTest("4_ConditionVariable_Threads", 4);
    261 }
    262 
    263 #if defined(OS_POSIX)
    264 
    265 // Absolutely 100% minimal posix waitable event. If there is a better/faster
    266 // way to force a context switch, we should use that instead.
    267 class PthreadEvent {
    268  public:
    269   PthreadEvent(bool manual_reset, bool initially_signaled) {
    270     DCHECK(!manual_reset);
    271     DCHECK(!initially_signaled);
    272     pthread_mutex_init(&mutex_, 0);
    273     pthread_cond_init(&cond_, 0);
    274     signaled_ = false;
    275   }
    276 
    277   ~PthreadEvent() {
    278     pthread_cond_destroy(&cond_);
    279     pthread_mutex_destroy(&mutex_);
    280   }
    281 
    282   void Signal() {
    283     pthread_mutex_lock(&mutex_);
    284     signaled_ = true;
    285     pthread_mutex_unlock(&mutex_);
    286     pthread_cond_signal(&cond_);
    287   }
    288 
    289   void Wait() {
    290     pthread_mutex_lock(&mutex_);
    291     while (!signaled_)
    292       pthread_cond_wait(&cond_, &mutex_);
    293     signaled_ = false;
    294     pthread_mutex_unlock(&mutex_);
    295   }
    296 
    297  private:
    298   bool signaled_;
    299   pthread_mutex_t mutex_;
    300   pthread_cond_t cond_;
    301 };
    302 
    303 // This is meant to test the absolute minimal context switching time.
    304 // If there is any faster way to do this we should substitute it in.
    305 typedef EventPerfTest<PthreadEvent> PthreadEventPerfTest;
    306 TEST_F(PthreadEventPerfTest, EventPingPong) {
    307   RunPingPongTest("4_PthreadCondVar_Threads", 4);
    308 }
    309 
    310 #endif
    311 
    312 }  // namespace
    313 
    314 }  // namespace base
    315