1 #include <math.h> 2 #include "json.h" 3 #include "idletime.h" 4 5 static volatile struct idle_prof_common ipc; 6 7 /* 8 * Get time to complete an unit work on a particular cpu. 9 * The minimum number in CALIBRATE_RUNS runs is returned. 10 */ 11 static double calibrate_unit(unsigned char *data) 12 { 13 unsigned long t, i, j, k; 14 struct timeval tps; 15 double tunit = 0.0; 16 17 for (i = 0; i < CALIBRATE_RUNS; i++) { 18 19 fio_gettime(&tps, NULL); 20 /* scale for less variance */ 21 for (j = 0; j < CALIBRATE_SCALE; j++) { 22 /* unit of work */ 23 for (k=0; k < page_size; k++) { 24 data[(k + j) % page_size] = k % 256; 25 /* 26 * we won't see STOP here. this is to match 27 * the same statement in the profiling loop. 28 */ 29 if (ipc.status == IDLE_PROF_STATUS_PROF_STOP) 30 return 0.0; 31 } 32 } 33 34 t = utime_since_now(&tps); 35 if (!t) 36 continue; 37 38 /* get the minimum time to complete CALIBRATE_SCALE units */ 39 if ((i == 0) || ((double)t < tunit)) 40 tunit = (double)t; 41 } 42 43 return tunit / CALIBRATE_SCALE; 44 } 45 46 static void free_cpu_affinity(struct idle_prof_thread *ipt) 47 { 48 #if defined(FIO_HAVE_CPU_AFFINITY) 49 fio_cpuset_exit(&ipt->cpu_mask); 50 #endif 51 } 52 53 static int set_cpu_affinity(struct idle_prof_thread *ipt) 54 { 55 #if defined(FIO_HAVE_CPU_AFFINITY) 56 if (fio_cpuset_init(&ipt->cpu_mask)) { 57 log_err("fio: cpuset init failed\n"); 58 return -1; 59 } 60 61 fio_cpu_set(&ipt->cpu_mask, ipt->cpu); 62 63 if (fio_setaffinity(gettid(), ipt->cpu_mask)) { 64 log_err("fio: fio_setaffinity failed\n"); 65 fio_cpuset_exit(&ipt->cpu_mask); 66 return -1; 67 } 68 69 return 0; 70 #else 71 log_err("fio: fio_setaffinity not supported\n"); 72 return -1; 73 #endif 74 } 75 76 static void *idle_prof_thread_fn(void *data) 77 { 78 int retval; 79 unsigned long j, k; 80 struct idle_prof_thread *ipt = data; 81 82 /* wait for all threads are spawned */ 83 pthread_mutex_lock(&ipt->init_lock); 84 85 /* exit if any other thread failed to start */ 86 if (ipc.status == IDLE_PROF_STATUS_ABORT) { 87 pthread_mutex_unlock(&ipt->init_lock); 88 return NULL; 89 } 90 91 retval = set_cpu_affinity(ipt); 92 if (retval == -1) { 93 ipt->state = TD_EXITED; 94 pthread_mutex_unlock(&ipt->init_lock); 95 return NULL; 96 } 97 98 ipt->cali_time = calibrate_unit(ipt->data); 99 100 /* delay to set IDLE class till now for better calibration accuracy */ 101 #if defined(CONFIG_SCHED_IDLE) 102 if ((retval = fio_set_sched_idle())) 103 log_err("fio: fio_set_sched_idle failed\n"); 104 #else 105 retval = -1; 106 log_err("fio: fio_set_sched_idle not supported\n"); 107 #endif 108 if (retval == -1) { 109 ipt->state = TD_EXITED; 110 pthread_mutex_unlock(&ipt->init_lock); 111 goto do_exit; 112 } 113 114 ipt->state = TD_INITIALIZED; 115 116 /* signal the main thread that calibration is done */ 117 pthread_cond_signal(&ipt->cond); 118 pthread_mutex_unlock(&ipt->init_lock); 119 120 /* wait for other calibration to finish */ 121 pthread_mutex_lock(&ipt->start_lock); 122 123 /* exit if other threads failed to initialize */ 124 if (ipc.status == IDLE_PROF_STATUS_ABORT) { 125 pthread_mutex_unlock(&ipt->start_lock); 126 goto do_exit; 127 } 128 129 /* exit if we are doing calibration only */ 130 if (ipc.status == IDLE_PROF_STATUS_CALI_STOP) { 131 pthread_mutex_unlock(&ipt->start_lock); 132 goto do_exit; 133 } 134 135 fio_gettime(&ipt->tps, NULL); 136 ipt->state = TD_RUNNING; 137 138 j = 0; 139 while (1) { 140 for (k = 0; k < page_size; k++) { 141 ipt->data[(k + j) % page_size] = k % 256; 142 if (ipc.status == IDLE_PROF_STATUS_PROF_STOP) { 143 fio_gettime(&ipt->tpe, NULL); 144 goto idle_prof_done; 145 } 146 } 147 j++; 148 } 149 150 idle_prof_done: 151 152 ipt->loops = j + (double) k / page_size; 153 ipt->state = TD_EXITED; 154 pthread_mutex_unlock(&ipt->start_lock); 155 156 do_exit: 157 free_cpu_affinity(ipt); 158 return NULL; 159 } 160 161 /* calculate mean and standard deviation to complete an unit of work */ 162 static void calibration_stats(void) 163 { 164 int i; 165 double sum = 0.0, var = 0.0; 166 struct idle_prof_thread *ipt; 167 168 for (i = 0; i < ipc.nr_cpus; i++) { 169 ipt = &ipc.ipts[i]; 170 sum += ipt->cali_time; 171 } 172 173 ipc.cali_mean = sum/ipc.nr_cpus; 174 175 for (i = 0; i < ipc.nr_cpus; i++) { 176 ipt = &ipc.ipts[i]; 177 var += pow(ipt->cali_time-ipc.cali_mean, 2); 178 } 179 180 ipc.cali_stddev = sqrt(var/(ipc.nr_cpus-1)); 181 } 182 183 void fio_idle_prof_init(void) 184 { 185 int i, ret; 186 struct timeval tp; 187 struct timespec ts; 188 pthread_attr_t tattr; 189 struct idle_prof_thread *ipt; 190 191 ipc.nr_cpus = cpus_online(); 192 ipc.status = IDLE_PROF_STATUS_OK; 193 194 if (ipc.opt == IDLE_PROF_OPT_NONE) 195 return; 196 197 if ((ret = pthread_attr_init(&tattr))) { 198 log_err("fio: pthread_attr_init %s\n", strerror(ret)); 199 return; 200 } 201 if ((ret = pthread_attr_setscope(&tattr, PTHREAD_SCOPE_SYSTEM))) { 202 log_err("fio: pthread_attr_setscope %s\n", strerror(ret)); 203 return; 204 } 205 206 ipc.ipts = malloc(ipc.nr_cpus * sizeof(struct idle_prof_thread)); 207 if (!ipc.ipts) { 208 log_err("fio: malloc failed\n"); 209 return; 210 } 211 212 ipc.buf = malloc(ipc.nr_cpus * page_size); 213 if (!ipc.buf) { 214 log_err("fio: malloc failed\n"); 215 free(ipc.ipts); 216 return; 217 } 218 219 /* 220 * profiling aborts on any single thread failure since the 221 * result won't be accurate if any cpu is not used. 222 */ 223 for (i = 0; i < ipc.nr_cpus; i++) { 224 ipt = &ipc.ipts[i]; 225 226 ipt->cpu = i; 227 ipt->state = TD_NOT_CREATED; 228 ipt->data = (unsigned char *)(ipc.buf + page_size * i); 229 230 if ((ret = pthread_mutex_init(&ipt->init_lock, NULL))) { 231 ipc.status = IDLE_PROF_STATUS_ABORT; 232 log_err("fio: pthread_mutex_init %s\n", strerror(ret)); 233 break; 234 } 235 236 if ((ret = pthread_mutex_init(&ipt->start_lock, NULL))) { 237 ipc.status = IDLE_PROF_STATUS_ABORT; 238 log_err("fio: pthread_mutex_init %s\n", strerror(ret)); 239 break; 240 } 241 242 if ((ret = pthread_cond_init(&ipt->cond, NULL))) { 243 ipc.status = IDLE_PROF_STATUS_ABORT; 244 log_err("fio: pthread_cond_init %s\n", strerror(ret)); 245 break; 246 } 247 248 /* make sure all threads are spawned before they start */ 249 pthread_mutex_lock(&ipt->init_lock); 250 251 /* make sure all threads finish init before profiling starts */ 252 pthread_mutex_lock(&ipt->start_lock); 253 254 if ((ret = pthread_create(&ipt->thread, &tattr, idle_prof_thread_fn, ipt))) { 255 ipc.status = IDLE_PROF_STATUS_ABORT; 256 log_err("fio: pthread_create %s\n", strerror(ret)); 257 break; 258 } else 259 ipt->state = TD_CREATED; 260 261 if ((ret = pthread_detach(ipt->thread))) { 262 /* log error and let the thread spin */ 263 log_err("fio: pthread_detatch %s\n", strerror(ret)); 264 } 265 } 266 267 /* 268 * let good threads continue so that they can exit 269 * if errors on other threads occurred previously. 270 */ 271 for (i = 0; i < ipc.nr_cpus; i++) { 272 ipt = &ipc.ipts[i]; 273 pthread_mutex_unlock(&ipt->init_lock); 274 } 275 276 if (ipc.status == IDLE_PROF_STATUS_ABORT) 277 return; 278 279 /* wait for calibration to finish */ 280 for (i = 0; i < ipc.nr_cpus; i++) { 281 ipt = &ipc.ipts[i]; 282 pthread_mutex_lock(&ipt->init_lock); 283 while ((ipt->state != TD_EXITED) && 284 (ipt->state!=TD_INITIALIZED)) { 285 fio_gettime(&tp, NULL); 286 ts.tv_sec = tp.tv_sec + 1; 287 ts.tv_nsec = tp.tv_usec * 1000; 288 pthread_cond_timedwait(&ipt->cond, &ipt->init_lock, &ts); 289 } 290 pthread_mutex_unlock(&ipt->init_lock); 291 292 /* 293 * any thread failed to initialize would abort other threads 294 * later after fio_idle_prof_start. 295 */ 296 if (ipt->state == TD_EXITED) 297 ipc.status = IDLE_PROF_STATUS_ABORT; 298 } 299 300 if (ipc.status != IDLE_PROF_STATUS_ABORT) 301 calibration_stats(); 302 else 303 ipc.cali_mean = ipc.cali_stddev = 0.0; 304 305 if (ipc.opt == IDLE_PROF_OPT_CALI) 306 ipc.status = IDLE_PROF_STATUS_CALI_STOP; 307 } 308 309 void fio_idle_prof_start(void) 310 { 311 int i; 312 struct idle_prof_thread *ipt; 313 314 if (ipc.opt == IDLE_PROF_OPT_NONE) 315 return; 316 317 /* unlock regardless abort is set or not */ 318 for (i = 0; i < ipc.nr_cpus; i++) { 319 ipt = &ipc.ipts[i]; 320 pthread_mutex_unlock(&ipt->start_lock); 321 } 322 } 323 324 void fio_idle_prof_stop(void) 325 { 326 int i; 327 uint64_t runt; 328 struct timeval tp; 329 struct timespec ts; 330 struct idle_prof_thread *ipt; 331 332 if (ipc.opt == IDLE_PROF_OPT_NONE) 333 return; 334 335 if (ipc.opt == IDLE_PROF_OPT_CALI) 336 return; 337 338 ipc.status = IDLE_PROF_STATUS_PROF_STOP; 339 340 /* wait for all threads to exit from profiling */ 341 for (i = 0; i < ipc.nr_cpus; i++) { 342 ipt = &ipc.ipts[i]; 343 pthread_mutex_lock(&ipt->start_lock); 344 while ((ipt->state != TD_EXITED) && 345 (ipt->state!=TD_NOT_CREATED)) { 346 fio_gettime(&tp, NULL); 347 ts.tv_sec = tp.tv_sec + 1; 348 ts.tv_nsec = tp.tv_usec * 1000; 349 /* timed wait in case a signal is not received */ 350 pthread_cond_timedwait(&ipt->cond, &ipt->start_lock, &ts); 351 } 352 pthread_mutex_unlock(&ipt->start_lock); 353 354 /* calculate idleness */ 355 if (ipc.cali_mean != 0.0) { 356 runt = utime_since(&ipt->tps, &ipt->tpe); 357 if (runt) 358 ipt->idleness = ipt->loops * ipc.cali_mean / runt; 359 else 360 ipt->idleness = 0.0; 361 } else 362 ipt->idleness = 0.0; 363 } 364 365 /* 366 * memory allocations are freed via explicit fio_idle_prof_cleanup 367 * after profiling stats are collected by apps. 368 */ 369 } 370 371 /* 372 * return system idle percentage when cpu is -1; 373 * return one cpu idle percentage otherwise. 374 */ 375 static double fio_idle_prof_cpu_stat(int cpu) 376 { 377 int i, nr_cpus = ipc.nr_cpus; 378 struct idle_prof_thread *ipt; 379 double p = 0.0; 380 381 if (ipc.opt == IDLE_PROF_OPT_NONE) 382 return 0.0; 383 384 if ((cpu >= nr_cpus) || (cpu < -1)) { 385 log_err("fio: idle profiling invalid cpu index\n"); 386 return 0.0; 387 } 388 389 if (cpu == -1) { 390 for (i = 0; i < nr_cpus; i++) { 391 ipt = &ipc.ipts[i]; 392 p += ipt->idleness; 393 } 394 p /= nr_cpus; 395 } else { 396 ipt = &ipc.ipts[cpu]; 397 p = ipt->idleness; 398 } 399 400 return p * 100.0; 401 } 402 403 static void fio_idle_prof_cleanup(void) 404 { 405 if (ipc.ipts) { 406 free(ipc.ipts); 407 ipc.ipts = NULL; 408 } 409 410 if (ipc.buf) { 411 free(ipc.buf); 412 ipc.buf = NULL; 413 } 414 } 415 416 int fio_idle_prof_parse_opt(const char *args) 417 { 418 ipc.opt = IDLE_PROF_OPT_NONE; /* default */ 419 420 if (!args) { 421 log_err("fio: empty idle-prof option string\n"); 422 return -1; 423 } 424 425 #if defined(FIO_HAVE_CPU_AFFINITY) && defined(CONFIG_SCHED_IDLE) 426 if (strcmp("calibrate", args) == 0) { 427 ipc.opt = IDLE_PROF_OPT_CALI; 428 fio_idle_prof_init(); 429 fio_idle_prof_start(); 430 fio_idle_prof_stop(); 431 show_idle_prof_stats(FIO_OUTPUT_NORMAL, NULL); 432 return 1; 433 } else if (strcmp("system", args) == 0) { 434 ipc.opt = IDLE_PROF_OPT_SYSTEM; 435 return 0; 436 } else if (strcmp("percpu", args) == 0) { 437 ipc.opt = IDLE_PROF_OPT_PERCPU; 438 return 0; 439 } else { 440 log_err("fio: incorrect idle-prof option: %s\n", args); 441 return -1; 442 } 443 #else 444 log_err("fio: idle-prof not supported on this platform\n"); 445 return -1; 446 #endif 447 } 448 449 void show_idle_prof_stats(int output, struct json_object *parent) 450 { 451 int i, nr_cpus = ipc.nr_cpus; 452 struct json_object *tmp; 453 char s[MAX_CPU_STR_LEN]; 454 455 if (output == FIO_OUTPUT_NORMAL) { 456 if (ipc.opt > IDLE_PROF_OPT_CALI) 457 log_info("\nCPU idleness:\n"); 458 else if (ipc.opt == IDLE_PROF_OPT_CALI) 459 log_info("CPU idleness:\n"); 460 461 if (ipc.opt >= IDLE_PROF_OPT_SYSTEM) 462 log_info(" system: %3.2f%%\n", fio_idle_prof_cpu_stat(-1)); 463 464 if (ipc.opt == IDLE_PROF_OPT_PERCPU) { 465 log_info(" percpu: %3.2f%%", fio_idle_prof_cpu_stat(0)); 466 for (i = 1; i < nr_cpus; i++) 467 log_info(", %3.2f%%", fio_idle_prof_cpu_stat(i)); 468 log_info("\n"); 469 } 470 471 if (ipc.opt >= IDLE_PROF_OPT_CALI) { 472 log_info(" unit work: mean=%3.2fus,", ipc.cali_mean); 473 log_info(" stddev=%3.2f\n", ipc.cali_stddev); 474 } 475 476 /* dynamic mem allocations can now be freed */ 477 if (ipc.opt != IDLE_PROF_OPT_NONE) 478 fio_idle_prof_cleanup(); 479 480 return; 481 } 482 483 if ((ipc.opt != IDLE_PROF_OPT_NONE) && (output == FIO_OUTPUT_JSON)) { 484 if (!parent) 485 return; 486 487 tmp = json_create_object(); 488 if (!tmp) 489 return; 490 491 json_object_add_value_object(parent, "cpu_idleness", tmp); 492 json_object_add_value_float(tmp, "system", fio_idle_prof_cpu_stat(-1)); 493 494 if (ipc.opt == IDLE_PROF_OPT_PERCPU) { 495 for (i = 0; i < nr_cpus; i++) { 496 snprintf(s, MAX_CPU_STR_LEN, "cpu-%d", i); 497 json_object_add_value_float(tmp, s, fio_idle_prof_cpu_stat(i)); 498 } 499 } 500 501 json_object_add_value_float(tmp, "unit_mean", ipc.cali_mean); 502 json_object_add_value_float(tmp, "unit_stddev", ipc.cali_stddev); 503 504 fio_idle_prof_cleanup(); 505 } 506 } 507