1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/base_switches.h" 6 #include "base/bind.h" 7 #include "base/command_line.h" 8 #include "base/memory/scoped_vector.h" 9 #include "base/synchronization/condition_variable.h" 10 #include "base/synchronization/lock.h" 11 #include "base/synchronization/waitable_event.h" 12 #include "base/threading/thread.h" 13 #include "base/time/time.h" 14 #include "build/build_config.h" 15 #include "testing/gtest/include/gtest/gtest.h" 16 #include "testing/perf/perf_test.h" 17 18 #if defined(OS_POSIX) 19 #include <pthread.h> 20 #endif 21 22 namespace base { 23 24 namespace { 25 26 const int kNumRuns = 100000; 27 28 // Base class for a threading perf-test. This sets up some threads for the 29 // test and measures the clock-time in addition to time spent on each thread. 30 class ThreadPerfTest : public testing::Test { 31 public: 32 ThreadPerfTest() 33 : done_(false, false) { 34 // Disable the task profiler as it adds significant cost! 35 CommandLine::Init(0, NULL); 36 CommandLine::ForCurrentProcess()->AppendSwitchASCII( 37 switches::kProfilerTiming, 38 switches::kProfilerTimingDisabledValue); 39 } 40 41 // To be implemented by each test. Subclass must uses threads_ such that 42 // their cpu-time can be measured. Test must return from PingPong() _and_ 43 // call FinishMeasurement from any thread to complete the test. 44 virtual void Init() {} 45 virtual void PingPong(int hops) = 0; 46 virtual void Reset() {} 47 48 void TimeOnThread(base::TimeTicks* ticks, base::WaitableEvent* done) { 49 *ticks = base::TimeTicks::ThreadNow(); 50 done->Signal(); 51 } 52 53 base::TimeTicks ThreadNow(base::Thread* thread) { 54 base::WaitableEvent done(false, false); 55 base::TimeTicks ticks; 56 thread->message_loop_proxy()->PostTask( 57 FROM_HERE, 58 base::Bind(&ThreadPerfTest::TimeOnThread, 59 base::Unretained(this), 60 &ticks, 61 &done)); 62 done.Wait(); 63 return ticks; 64 } 65 66 void RunPingPongTest(const std::string& name, unsigned num_threads) { 67 // Create threads and collect starting cpu-time for each thread. 68 std::vector<base::TimeTicks> thread_starts; 69 while (threads_.size() < num_threads) { 70 threads_.push_back(new base::Thread("PingPonger")); 71 threads_.back()->Start(); 72 if (base::TimeTicks::IsThreadNowSupported()) 73 thread_starts.push_back(ThreadNow(threads_.back())); 74 } 75 76 Init(); 77 78 base::TimeTicks start = base::TimeTicks::HighResNow(); 79 PingPong(kNumRuns); 80 done_.Wait(); 81 base::TimeTicks end = base::TimeTicks::HighResNow(); 82 83 // Gather the cpu-time spent on each thread. This does one extra tasks, 84 // but that should be in the noise given enough runs. 85 base::TimeDelta thread_time; 86 while (threads_.size()) { 87 if (base::TimeTicks::IsThreadNowSupported()) { 88 thread_time += ThreadNow(threads_.back()) - thread_starts.back(); 89 thread_starts.pop_back(); 90 } 91 threads_.pop_back(); 92 } 93 94 Reset(); 95 96 double num_runs = static_cast<double>(kNumRuns); 97 double us_per_task_clock = (end - start).InMicroseconds() / num_runs; 98 double us_per_task_cpu = thread_time.InMicroseconds() / num_runs; 99 100 // Clock time per task. 101 perf_test::PrintResult( 102 "task", "", name + "_time ", us_per_task_clock, "us/hop", true); 103 104 // Total utilization across threads if available (likely higher). 105 if (base::TimeTicks::IsThreadNowSupported()) { 106 perf_test::PrintResult( 107 "task", "", name + "_cpu ", us_per_task_cpu, "us/hop", true); 108 } 109 } 110 111 protected: 112 void FinishMeasurement() { done_.Signal(); } 113 ScopedVector<base::Thread> threads_; 114 115 private: 116 base::WaitableEvent done_; 117 }; 118 119 // Class to test task performance by posting empty tasks back and forth. 120 class TaskPerfTest : public ThreadPerfTest { 121 base::Thread* NextThread(int count) { 122 return threads_[count % threads_.size()]; 123 } 124 125 virtual void PingPong(int hops) OVERRIDE { 126 if (!hops) { 127 FinishMeasurement(); 128 return; 129 } 130 NextThread(hops)->message_loop_proxy()->PostTask( 131 FROM_HERE, 132 base::Bind( 133 &ThreadPerfTest::PingPong, base::Unretained(this), hops - 1)); 134 } 135 }; 136 137 // This tries to test the 'best-case' as well as the 'worst-case' task posting 138 // performance. The best-case keeps one thread alive such that it never yeilds, 139 // while the worse-case forces a context switch for every task. Four threads are 140 // used to ensure the threads do yeild (with just two it might be possible for 141 // both threads to stay awake if they can signal each other fast enough). 142 TEST_F(TaskPerfTest, TaskPingPong) { 143 RunPingPongTest("1_Task_Threads", 1); 144 RunPingPongTest("4_Task_Threads", 4); 145 } 146 147 148 // Same as above, but add observers to test their perf impact. 149 class MessageLoopObserver : public base::MessageLoop::TaskObserver { 150 public: 151 virtual void WillProcessTask(const base::PendingTask& pending_task) OVERRIDE { 152 } 153 virtual void DidProcessTask(const base::PendingTask& pending_task) OVERRIDE { 154 } 155 }; 156 MessageLoopObserver message_loop_observer; 157 158 class TaskObserverPerfTest : public TaskPerfTest { 159 public: 160 virtual void Init() OVERRIDE { 161 TaskPerfTest::Init(); 162 for (size_t i = 0; i < threads_.size(); i++) { 163 threads_[i]->message_loop()->AddTaskObserver(&message_loop_observer); 164 } 165 } 166 }; 167 168 TEST_F(TaskObserverPerfTest, TaskPingPong) { 169 RunPingPongTest("1_Task_Threads_With_Observer", 1); 170 RunPingPongTest("4_Task_Threads_With_Observer", 4); 171 } 172 173 // Class to test our WaitableEvent performance by signaling back and fort. 174 // WaitableEvent is templated so we can also compare with other versions. 175 template <typename WaitableEventType> 176 class EventPerfTest : public ThreadPerfTest { 177 public: 178 virtual void Init() OVERRIDE { 179 for (size_t i = 0; i < threads_.size(); i++) 180 events_.push_back(new WaitableEventType(false, false)); 181 } 182 183 virtual void Reset() OVERRIDE { events_.clear(); } 184 185 void WaitAndSignalOnThread(size_t event) { 186 size_t next_event = (event + 1) % events_.size(); 187 int my_hops = 0; 188 do { 189 events_[event]->Wait(); 190 my_hops = --remaining_hops_; // We own 'hops' between Wait and Signal. 191 events_[next_event]->Signal(); 192 } while (my_hops > 0); 193 // Once we are done, all threads will signal as hops passes zero. 194 // We only signal completion once, on the thread that reaches zero. 195 if (!my_hops) 196 FinishMeasurement(); 197 } 198 199 virtual void PingPong(int hops) OVERRIDE { 200 remaining_hops_ = hops; 201 for (size_t i = 0; i < threads_.size(); i++) { 202 threads_[i]->message_loop_proxy()->PostTask( 203 FROM_HERE, 204 base::Bind(&EventPerfTest::WaitAndSignalOnThread, 205 base::Unretained(this), 206 i)); 207 } 208 209 // Kick off the Signal ping-ponging. 210 events_.front()->Signal(); 211 } 212 213 int remaining_hops_; 214 ScopedVector<WaitableEventType> events_; 215 }; 216 217 // Similar to the task posting test, this just tests similar functionality 218 // using WaitableEvents. We only test four threads (worst-case), but we 219 // might want to craft a way to test the best-case (where the thread doesn't 220 // end up blocking because the event is already signalled). 221 typedef EventPerfTest<base::WaitableEvent> WaitableEventPerfTest; 222 TEST_F(WaitableEventPerfTest, EventPingPong) { 223 RunPingPongTest("4_WaitableEvent_Threads", 4); 224 } 225 226 // Build a minimal event using ConditionVariable. 227 class ConditionVariableEvent { 228 public: 229 ConditionVariableEvent(bool manual_reset, bool initially_signaled) 230 : cond_(&lock_), signaled_(false) { 231 DCHECK(!manual_reset); 232 DCHECK(!initially_signaled); 233 } 234 235 void Signal() { 236 { 237 base::AutoLock scoped_lock(lock_); 238 signaled_ = true; 239 } 240 cond_.Signal(); 241 } 242 243 void Wait() { 244 base::AutoLock scoped_lock(lock_); 245 while (!signaled_) 246 cond_.Wait(); 247 signaled_ = false; 248 } 249 250 private: 251 base::Lock lock_; 252 base::ConditionVariable cond_; 253 bool signaled_; 254 }; 255 256 // This is meant to test the absolute minimal context switching time 257 // using our own base synchronization code. 258 typedef EventPerfTest<ConditionVariableEvent> ConditionVariablePerfTest; 259 TEST_F(ConditionVariablePerfTest, EventPingPong) { 260 RunPingPongTest("4_ConditionVariable_Threads", 4); 261 } 262 263 #if defined(OS_POSIX) 264 265 // Absolutely 100% minimal posix waitable event. If there is a better/faster 266 // way to force a context switch, we should use that instead. 267 class PthreadEvent { 268 public: 269 PthreadEvent(bool manual_reset, bool initially_signaled) { 270 DCHECK(!manual_reset); 271 DCHECK(!initially_signaled); 272 pthread_mutex_init(&mutex_, 0); 273 pthread_cond_init(&cond_, 0); 274 signaled_ = false; 275 } 276 277 ~PthreadEvent() { 278 pthread_cond_destroy(&cond_); 279 pthread_mutex_destroy(&mutex_); 280 } 281 282 void Signal() { 283 pthread_mutex_lock(&mutex_); 284 signaled_ = true; 285 pthread_mutex_unlock(&mutex_); 286 pthread_cond_signal(&cond_); 287 } 288 289 void Wait() { 290 pthread_mutex_lock(&mutex_); 291 while (!signaled_) 292 pthread_cond_wait(&cond_, &mutex_); 293 signaled_ = false; 294 pthread_mutex_unlock(&mutex_); 295 } 296 297 private: 298 bool signaled_; 299 pthread_mutex_t mutex_; 300 pthread_cond_t cond_; 301 }; 302 303 // This is meant to test the absolute minimal context switching time. 304 // If there is any faster way to do this we should substitute it in. 305 typedef EventPerfTest<PthreadEvent> PthreadEventPerfTest; 306 TEST_F(PthreadEventPerfTest, EventPingPong) { 307 RunPingPongTest("4_PthreadCondVar_Threads", 4); 308 } 309 310 #endif 311 312 } // namespace 313 314 } // namespace base 315