1 #include <binder/Binder.h> 2 #include <binder/IBinder.h> 3 #include <binder/IPCThreadState.h> 4 #include <binder/IServiceManager.h> 5 #include <string> 6 #include <cstring> 7 #include <cstdlib> 8 #include <cstdio> 9 10 #include <iostream> 11 #include <vector> 12 #include <tuple> 13 14 #include <unistd.h> 15 #include <sys/wait.h> 16 17 using namespace std; 18 using namespace android; 19 20 enum BinderWorkerServiceCode { 21 BINDER_NOP = IBinder::FIRST_CALL_TRANSACTION, 22 }; 23 24 #define ASSERT_TRUE(cond) \ 25 do { \ 26 if (!(cond)) {\ 27 cerr << __func__ << ":" << __LINE__ << " condition:" << #cond << " failed\n" << endl; \ 28 exit(EXIT_FAILURE); \ 29 } \ 30 } while (0) 31 32 class BinderWorkerService : public BBinder 33 { 34 public: 35 BinderWorkerService() {} 36 ~BinderWorkerService() {} 37 virtual status_t onTransact(uint32_t code, 38 const Parcel& data, Parcel* reply, 39 uint32_t flags = 0) { 40 (void)flags; 41 (void)data; 42 (void)reply; 43 switch (code) { 44 case BINDER_NOP: 45 return NO_ERROR; 46 default: 47 return UNKNOWN_TRANSACTION; 48 }; 49 } 50 }; 51 52 class Pipe { 53 int m_readFd; 54 int m_writeFd; 55 Pipe(int readFd, int writeFd) : m_readFd{readFd}, m_writeFd{writeFd} {} 56 Pipe(const Pipe &) = delete; 57 Pipe& operator=(const Pipe &) = delete; 58 Pipe& operator=(const Pipe &&) = delete; 59 public: 60 Pipe(Pipe&& rval) noexcept { 61 m_readFd = rval.m_readFd; 62 m_writeFd = rval.m_writeFd; 63 rval.m_readFd = 0; 64 rval.m_writeFd = 0; 65 } 66 ~Pipe() { 67 if (m_readFd) 68 close(m_readFd); 69 if (m_writeFd) 70 close(m_writeFd); 71 } 72 void signal() { 73 bool val = true; 74 int error = write(m_writeFd, &val, sizeof(val)); 75 ASSERT_TRUE(error >= 0); 76 }; 77 void wait() { 78 bool val = false; 79 int error = read(m_readFd, &val, sizeof(val)); 80 ASSERT_TRUE(error >= 0); 81 } 82 template <typename T> void send(const T& v) { 83 int error = write(m_writeFd, &v, sizeof(T)); 84 ASSERT_TRUE(error >= 0); 85 } 86 template <typename T> void recv(T& v) { 87 int error = read(m_readFd, &v, sizeof(T)); 88 ASSERT_TRUE(error >= 0); 89 } 90 static tuple<Pipe, Pipe> createPipePair() { 91 int a[2]; 92 int b[2]; 93 94 int error1 = pipe(a); 95 int error2 = pipe(b); 96 ASSERT_TRUE(error1 >= 0); 97 ASSERT_TRUE(error2 >= 0); 98 99 return make_tuple(Pipe(a[0], b[1]), Pipe(b[0], a[1])); 100 } 101 }; 102 103 static const uint32_t num_buckets = 128; 104 static uint64_t max_time_bucket = 50ull * 1000000; 105 static uint64_t time_per_bucket = max_time_bucket / num_buckets; 106 107 struct ProcResults { 108 uint64_t m_worst = 0; 109 uint32_t m_buckets[num_buckets] = {0}; 110 uint64_t m_transactions = 0; 111 uint64_t m_long_transactions = 0; 112 uint64_t m_total_time = 0; 113 uint64_t m_best = max_time_bucket; 114 115 void add_time(uint64_t time) { 116 if (time > max_time_bucket) { 117 m_long_transactions++; 118 } 119 m_buckets[min(time, max_time_bucket-1) / time_per_bucket] += 1; 120 m_best = min(time, m_best); 121 m_worst = max(time, m_worst); 122 m_transactions += 1; 123 m_total_time += time; 124 } 125 static ProcResults combine(const ProcResults& a, const ProcResults& b) { 126 ProcResults ret; 127 for (int i = 0; i < num_buckets; i++) { 128 ret.m_buckets[i] = a.m_buckets[i] + b.m_buckets[i]; 129 } 130 ret.m_worst = max(a.m_worst, b.m_worst); 131 ret.m_best = min(a.m_best, b.m_best); 132 ret.m_transactions = a.m_transactions + b.m_transactions; 133 ret.m_long_transactions = a.m_long_transactions + b.m_long_transactions; 134 ret.m_total_time = a.m_total_time + b.m_total_time; 135 return ret; 136 } 137 void dump() { 138 if (m_long_transactions > 0) { 139 cout << (double)m_long_transactions / m_transactions << "% of transactions took longer " 140 "than estimated max latency. Consider setting -m to be higher than " 141 << m_worst / 1000 << " microseconds" << endl; 142 } 143 144 double best = (double)m_best / 1.0E6; 145 double worst = (double)m_worst / 1.0E6; 146 double average = (double)m_total_time / m_transactions / 1.0E6; 147 cout << "average:" << average << "ms worst:" << worst << "ms best:" << best << "ms" << endl; 148 149 uint64_t cur_total = 0; 150 float time_per_bucket_ms = time_per_bucket / 1.0E6; 151 for (int i = 0; i < num_buckets; i++) { 152 float cur_time = time_per_bucket_ms * i + 0.5f * time_per_bucket_ms; 153 if ((cur_total < 0.5f * m_transactions) && (cur_total + m_buckets[i] >= 0.5f * m_transactions)) { 154 cout << "50%: " << cur_time << " "; 155 } 156 if ((cur_total < 0.9f * m_transactions) && (cur_total + m_buckets[i] >= 0.9f * m_transactions)) { 157 cout << "90%: " << cur_time << " "; 158 } 159 if ((cur_total < 0.95f * m_transactions) && (cur_total + m_buckets[i] >= 0.95f * m_transactions)) { 160 cout << "95%: " << cur_time << " "; 161 } 162 if ((cur_total < 0.99f * m_transactions) && (cur_total + m_buckets[i] >= 0.99f * m_transactions)) { 163 cout << "99%: " << cur_time << " "; 164 } 165 cur_total += m_buckets[i]; 166 } 167 cout << endl; 168 } 169 }; 170 171 String16 generateServiceName(int num) 172 { 173 char num_str[32]; 174 snprintf(num_str, sizeof(num_str), "%d", num); 175 String16 serviceName = String16("binderWorker") + String16(num_str); 176 return serviceName; 177 } 178 179 void worker_fx(int num, 180 int worker_count, 181 int iterations, 182 int payload_size, 183 bool cs_pair, 184 Pipe p) 185 { 186 // Create BinderWorkerService and for go. 187 ProcessState::self()->startThreadPool(); 188 sp<IServiceManager> serviceMgr = defaultServiceManager(); 189 sp<BinderWorkerService> service = new BinderWorkerService; 190 serviceMgr->addService(generateServiceName(num), service); 191 192 srand(num); 193 p.signal(); 194 p.wait(); 195 196 // If client/server pairs, then half the workers are 197 // servers and half are clients 198 int server_count = cs_pair ? worker_count / 2 : worker_count; 199 200 // Get references to other binder services. 201 cout << "Created BinderWorker" << num << endl; 202 (void)worker_count; 203 vector<sp<IBinder> > workers; 204 for (int i = 0; i < server_count; i++) { 205 if (num == i) 206 continue; 207 workers.push_back(serviceMgr->getService(generateServiceName(i))); 208 } 209 210 // Run the benchmark if client 211 ProcResults results; 212 chrono::time_point<chrono::high_resolution_clock> start, end; 213 for (int i = 0; (!cs_pair || num >= server_count) && i < iterations; i++) { 214 Parcel data, reply; 215 int target = cs_pair ? num % server_count : rand() % workers.size(); 216 int sz = payload_size; 217 218 while (sz >= sizeof(uint32_t)) { 219 data.writeInt32(0); 220 sz -= sizeof(uint32_t); 221 } 222 start = chrono::high_resolution_clock::now(); 223 status_t ret = workers[target]->transact(BINDER_NOP, data, &reply); 224 end = chrono::high_resolution_clock::now(); 225 226 uint64_t cur_time = uint64_t(chrono::duration_cast<chrono::nanoseconds>(end - start).count()); 227 results.add_time(cur_time); 228 229 if (ret != NO_ERROR) { 230 cout << "thread " << num << " failed " << ret << "i : " << i << endl; 231 exit(EXIT_FAILURE); 232 } 233 } 234 235 // Signal completion to master and wait. 236 p.signal(); 237 p.wait(); 238 239 // Send results to master and wait for go to exit. 240 p.send(results); 241 p.wait(); 242 243 exit(EXIT_SUCCESS); 244 } 245 246 Pipe make_worker(int num, int iterations, int worker_count, int payload_size, bool cs_pair) 247 { 248 auto pipe_pair = Pipe::createPipePair(); 249 pid_t pid = fork(); 250 if (pid) { 251 /* parent */ 252 return move(get<0>(pipe_pair)); 253 } else { 254 /* child */ 255 worker_fx(num, worker_count, iterations, payload_size, cs_pair, move(get<1>(pipe_pair))); 256 /* never get here */ 257 return move(get<0>(pipe_pair)); 258 } 259 260 } 261 262 void wait_all(vector<Pipe>& v) 263 { 264 for (int i = 0; i < v.size(); i++) { 265 v[i].wait(); 266 } 267 } 268 269 void signal_all(vector<Pipe>& v) 270 { 271 for (int i = 0; i < v.size(); i++) { 272 v[i].signal(); 273 } 274 } 275 276 void run_main(int iterations, 277 int workers, 278 int payload_size, 279 int cs_pair, 280 bool training_round=false) 281 { 282 vector<Pipe> pipes; 283 // Create all the workers and wait for them to spawn. 284 for (int i = 0; i < workers; i++) { 285 pipes.push_back(make_worker(i, iterations, workers, payload_size, cs_pair)); 286 } 287 wait_all(pipes); 288 289 // Run the workers and wait for completion. 290 chrono::time_point<chrono::high_resolution_clock> start, end; 291 cout << "waiting for workers to complete" << endl; 292 start = chrono::high_resolution_clock::now(); 293 signal_all(pipes); 294 wait_all(pipes); 295 end = chrono::high_resolution_clock::now(); 296 297 // Calculate overall throughput. 298 double iterations_per_sec = double(iterations * workers) / (chrono::duration_cast<chrono::nanoseconds>(end - start).count() / 1.0E9); 299 cout << "iterations per sec: " << iterations_per_sec << endl; 300 301 // Collect all results from the workers. 302 cout << "collecting results" << endl; 303 signal_all(pipes); 304 ProcResults tot_results; 305 for (int i = 0; i < workers; i++) { 306 ProcResults tmp_results; 307 pipes[i].recv(tmp_results); 308 tot_results = ProcResults::combine(tot_results, tmp_results); 309 } 310 311 // Kill all the workers. 312 cout << "killing workers" << endl; 313 signal_all(pipes); 314 for (int i = 0; i < workers; i++) { 315 int status; 316 wait(&status); 317 if (status != 0) { 318 cout << "nonzero child status" << status << endl; 319 } 320 } 321 if (training_round) { 322 // sets max_time_bucket to 2 * m_worst from the training round. 323 // Also needs to adjust time_per_bucket accordingly. 324 max_time_bucket = 2 * tot_results.m_worst; 325 time_per_bucket = max_time_bucket / num_buckets; 326 cout << "Max latency during training: " << tot_results.m_worst / 1.0E6 << "ms" << endl; 327 } else { 328 tot_results.dump(); 329 } 330 } 331 332 int main(int argc, char *argv[]) 333 { 334 int workers = 2; 335 int iterations = 10000; 336 int payload_size = 0; 337 bool cs_pair = false; 338 bool training_round = false; 339 (void)argc; 340 (void)argv; 341 342 // Parse arguments. 343 for (int i = 1; i < argc; i++) { 344 if (string(argv[i]) == "--help") { 345 cout << "Usage: binderThroughputTest [OPTIONS]" << endl; 346 cout << "\t-i N : Specify number of iterations." << endl; 347 cout << "\t-m N : Specify expected max latency in microseconds." << endl; 348 cout << "\t-p : Split workers into client/server pairs." << endl; 349 cout << "\t-s N : Specify payload size." << endl; 350 cout << "\t-t N : Run training round." << endl; 351 cout << "\t-w N : Specify total number of workers." << endl; 352 return 0; 353 } 354 if (string(argv[i]) == "-w") { 355 workers = atoi(argv[i+1]); 356 i++; 357 continue; 358 } 359 if (string(argv[i]) == "-i") { 360 iterations = atoi(argv[i+1]); 361 i++; 362 continue; 363 } 364 if (string(argv[i]) == "-s") { 365 payload_size = atoi(argv[i+1]); 366 i++; 367 } 368 if (string(argv[i]) == "-p") { 369 // client/server pairs instead of spreading 370 // requests to all workers. If true, half 371 // the workers become clients and half servers 372 cs_pair = true; 373 } 374 if (string(argv[i]) == "-t") { 375 // Run one training round before actually collecting data 376 // to get an approximation of max latency. 377 training_round = true; 378 } 379 if (string(argv[i]) == "-m") { 380 // Caller specified the max latency in microseconds. 381 // No need to run training round in this case. 382 if (atoi(argv[i+1]) > 0) { 383 max_time_bucket = strtoull(argv[i+1], (char **)nullptr, 10) * 1000; 384 time_per_bucket = max_time_bucket / num_buckets; 385 i++; 386 } else { 387 cout << "Max latency -m must be positive." << endl; 388 exit(EXIT_FAILURE); 389 } 390 } 391 } 392 393 if (training_round) { 394 cout << "Start training round" << endl; 395 run_main(iterations, workers, payload_size, cs_pair, training_round=true); 396 cout << "Completed training round" << endl << endl; 397 } 398 399 run_main(iterations, workers, payload_size, cs_pair); 400 return 0; 401 } 402