1 #include <math.h> 2 #include "json.h" 3 #include "idletime.h" 4 5 static volatile struct idle_prof_common ipc; 6 7 /* 8 * Get time to complete an unit work on a particular cpu. 9 * The minimum number in CALIBRATE_RUNS runs is returned. 10 */ 11 static double calibrate_unit(unsigned char *data) 12 { 13 unsigned long t, i, j, k; 14 struct timeval tps; 15 double tunit = 0.0; 16 17 for (i = 0; i < CALIBRATE_RUNS; i++) { 18 19 fio_gettime(&tps, NULL); 20 /* scale for less variance */ 21 for (j = 0; j < CALIBRATE_SCALE; j++) { 22 /* unit of work */ 23 for (k=0; k < page_size; k++) { 24 data[(k + j) % page_size] = k % 256; 25 /* 26 * we won't see STOP here. this is to match 27 * the same statement in the profiling loop. 28 */ 29 if (ipc.status == IDLE_PROF_STATUS_PROF_STOP) 30 return 0.0; 31 } 32 } 33 34 t = utime_since_now(&tps); 35 if (!t) 36 continue; 37 38 /* get the minimum time to complete CALIBRATE_SCALE units */ 39 if ((i == 0) || ((double)t < tunit)) 40 tunit = (double)t; 41 } 42 43 return tunit / CALIBRATE_SCALE; 44 } 45 46 static int set_cpu_affinity(struct idle_prof_thread *ipt) 47 { 48 #if defined(FIO_HAVE_CPU_AFFINITY) 49 os_cpu_mask_t cpu_mask; 50 51 memset(&cpu_mask, 0, sizeof(cpu_mask)); 52 fio_cpu_set(&cpu_mask, ipt->cpu); 53 54 if (fio_setaffinity(gettid(), cpu_mask)) { 55 log_err("fio: fio_setaffinity failed\n"); 56 return -1; 57 } 58 59 return 0; 60 #else 61 log_err("fio: fio_setaffinity not supported\n"); 62 return -1; 63 #endif 64 } 65 66 static void *idle_prof_thread_fn(void *data) 67 { 68 int retval; 69 unsigned long j, k; 70 struct idle_prof_thread *ipt = data; 71 72 /* wait for all threads are spawned */ 73 pthread_mutex_lock(&ipt->init_lock); 74 75 /* exit if any other thread failed to start */ 76 if (ipc.status == IDLE_PROF_STATUS_ABORT) { 77 pthread_mutex_unlock(&ipt->init_lock); 78 return NULL; 79 } 80 81 retval = set_cpu_affinity(ipt); 82 if (retval == -1) { 83 ipt->state = TD_EXITED; 84 pthread_mutex_unlock(&ipt->init_lock); 85 return NULL; 86 } 87 88 ipt->cali_time = calibrate_unit(ipt->data); 89 90 /* delay to set IDLE class till now for better calibration accuracy */ 91 #if defined(CONFIG_SCHED_IDLE) 92 if ((retval = fio_set_sched_idle())) 93 log_err("fio: fio_set_sched_idle failed\n"); 94 #else 95 retval = -1; 96 log_err("fio: fio_set_sched_idle not supported\n"); 97 #endif 98 if (retval == -1) { 99 ipt->state = TD_EXITED; 100 pthread_mutex_unlock(&ipt->init_lock); 101 return NULL; 102 } 103 104 ipt->state = TD_INITIALIZED; 105 106 /* signal the main thread that calibration is done */ 107 pthread_cond_signal(&ipt->cond); 108 pthread_mutex_unlock(&ipt->init_lock); 109 110 /* wait for other calibration to finish */ 111 pthread_mutex_lock(&ipt->start_lock); 112 113 /* exit if other threads failed to initialize */ 114 if (ipc.status == IDLE_PROF_STATUS_ABORT) { 115 pthread_mutex_unlock(&ipt->start_lock); 116 return NULL; 117 } 118 119 /* exit if we are doing calibration only */ 120 if (ipc.status == IDLE_PROF_STATUS_CALI_STOP) { 121 pthread_mutex_unlock(&ipt->start_lock); 122 return NULL; 123 } 124 125 fio_gettime(&ipt->tps, NULL); 126 ipt->state = TD_RUNNING; 127 128 j = 0; 129 while (1) { 130 for (k = 0; k < page_size; k++) { 131 ipt->data[(k + j) % page_size] = k % 256; 132 if (ipc.status == IDLE_PROF_STATUS_PROF_STOP) { 133 fio_gettime(&ipt->tpe, NULL); 134 goto idle_prof_done; 135 } 136 } 137 j++; 138 } 139 140 idle_prof_done: 141 142 ipt->loops = j + (double) k / page_size; 143 ipt->state = TD_EXITED; 144 pthread_mutex_unlock(&ipt->start_lock); 145 146 return NULL; 147 } 148 149 /* calculate mean and standard deviation to complete an unit of work */ 150 static void calibration_stats(void) 151 { 152 int i; 153 double sum = 0.0, var = 0.0; 154 struct idle_prof_thread *ipt; 155 156 for (i = 0; i < ipc.nr_cpus; i++) { 157 ipt = &ipc.ipts[i]; 158 sum += ipt->cali_time; 159 } 160 161 ipc.cali_mean = sum/ipc.nr_cpus; 162 163 for (i = 0; i < ipc.nr_cpus; i++) { 164 ipt = &ipc.ipts[i]; 165 var += pow(ipt->cali_time-ipc.cali_mean, 2); 166 } 167 168 ipc.cali_stddev = sqrt(var/(ipc.nr_cpus-1)); 169 } 170 171 void fio_idle_prof_init(void) 172 { 173 int i, ret; 174 struct timeval tp; 175 struct timespec ts; 176 pthread_attr_t tattr; 177 struct idle_prof_thread *ipt; 178 179 ipc.nr_cpus = cpus_online(); 180 ipc.status = IDLE_PROF_STATUS_OK; 181 182 if (ipc.opt == IDLE_PROF_OPT_NONE) 183 return; 184 185 if ((ret = pthread_attr_init(&tattr))) { 186 log_err("fio: pthread_attr_init %s\n", strerror(ret)); 187 return; 188 } 189 if ((ret = pthread_attr_setscope(&tattr, PTHREAD_SCOPE_SYSTEM))) { 190 log_err("fio: pthread_attr_setscope %s\n", strerror(ret)); 191 return; 192 } 193 194 ipc.ipts = malloc(ipc.nr_cpus * sizeof(struct idle_prof_thread)); 195 if (!ipc.ipts) { 196 log_err("fio: malloc failed\n"); 197 return; 198 } 199 200 ipc.buf = malloc(ipc.nr_cpus * page_size); 201 if (!ipc.buf) { 202 log_err("fio: malloc failed\n"); 203 free(ipc.ipts); 204 return; 205 } 206 207 /* 208 * profiling aborts on any single thread failure since the 209 * result won't be accurate if any cpu is not used. 210 */ 211 for (i = 0; i < ipc.nr_cpus; i++) { 212 ipt = &ipc.ipts[i]; 213 214 ipt->cpu = i; 215 ipt->state = TD_NOT_CREATED; 216 ipt->data = (unsigned char *)(ipc.buf + page_size * i); 217 218 if ((ret = pthread_mutex_init(&ipt->init_lock, NULL))) { 219 ipc.status = IDLE_PROF_STATUS_ABORT; 220 log_err("fio: pthread_mutex_init %s\n", strerror(ret)); 221 break; 222 } 223 224 if ((ret = pthread_mutex_init(&ipt->start_lock, NULL))) { 225 ipc.status = IDLE_PROF_STATUS_ABORT; 226 log_err("fio: pthread_mutex_init %s\n", strerror(ret)); 227 break; 228 } 229 230 if ((ret = pthread_cond_init(&ipt->cond, NULL))) { 231 ipc.status = IDLE_PROF_STATUS_ABORT; 232 log_err("fio: pthread_cond_init %s\n", strerror(ret)); 233 break; 234 } 235 236 /* make sure all threads are spawned before they start */ 237 pthread_mutex_lock(&ipt->init_lock); 238 239 /* make sure all threads finish init before profiling starts */ 240 pthread_mutex_lock(&ipt->start_lock); 241 242 if ((ret = pthread_create(&ipt->thread, &tattr, idle_prof_thread_fn, ipt))) { 243 ipc.status = IDLE_PROF_STATUS_ABORT; 244 log_err("fio: pthread_create %s\n", strerror(ret)); 245 break; 246 } else 247 ipt->state = TD_CREATED; 248 249 if ((ret = pthread_detach(ipt->thread))) { 250 /* log error and let the thread spin */ 251 log_err("fio: pthread_detatch %s\n", strerror(ret)); 252 } 253 } 254 255 /* 256 * let good threads continue so that they can exit 257 * if errors on other threads occurred previously. 258 */ 259 for (i = 0; i < ipc.nr_cpus; i++) { 260 ipt = &ipc.ipts[i]; 261 pthread_mutex_unlock(&ipt->init_lock); 262 } 263 264 if (ipc.status == IDLE_PROF_STATUS_ABORT) 265 return; 266 267 /* wait for calibration to finish */ 268 for (i = 0; i < ipc.nr_cpus; i++) { 269 ipt = &ipc.ipts[i]; 270 pthread_mutex_lock(&ipt->init_lock); 271 while ((ipt->state != TD_EXITED) && 272 (ipt->state!=TD_INITIALIZED)) { 273 fio_gettime(&tp, NULL); 274 ts.tv_sec = tp.tv_sec + 1; 275 ts.tv_nsec = tp.tv_usec * 1000; 276 pthread_cond_timedwait(&ipt->cond, &ipt->init_lock, &ts); 277 } 278 pthread_mutex_unlock(&ipt->init_lock); 279 280 /* 281 * any thread failed to initialize would abort other threads 282 * later after fio_idle_prof_start. 283 */ 284 if (ipt->state == TD_EXITED) 285 ipc.status = IDLE_PROF_STATUS_ABORT; 286 } 287 288 if (ipc.status != IDLE_PROF_STATUS_ABORT) 289 calibration_stats(); 290 else 291 ipc.cali_mean = ipc.cali_stddev = 0.0; 292 293 if (ipc.opt == IDLE_PROF_OPT_CALI) 294 ipc.status = IDLE_PROF_STATUS_CALI_STOP; 295 } 296 297 void fio_idle_prof_start(void) 298 { 299 int i; 300 struct idle_prof_thread *ipt; 301 302 if (ipc.opt == IDLE_PROF_OPT_NONE) 303 return; 304 305 /* unlock regardless abort is set or not */ 306 for (i = 0; i < ipc.nr_cpus; i++) { 307 ipt = &ipc.ipts[i]; 308 pthread_mutex_unlock(&ipt->start_lock); 309 } 310 } 311 312 void fio_idle_prof_stop(void) 313 { 314 int i; 315 uint64_t runt; 316 struct timeval tp; 317 struct timespec ts; 318 struct idle_prof_thread *ipt; 319 320 if (ipc.opt == IDLE_PROF_OPT_NONE) 321 return; 322 323 if (ipc.opt == IDLE_PROF_OPT_CALI) 324 return; 325 326 ipc.status = IDLE_PROF_STATUS_PROF_STOP; 327 328 /* wait for all threads to exit from profiling */ 329 for (i = 0; i < ipc.nr_cpus; i++) { 330 ipt = &ipc.ipts[i]; 331 pthread_mutex_lock(&ipt->start_lock); 332 while ((ipt->state != TD_EXITED) && 333 (ipt->state!=TD_NOT_CREATED)) { 334 fio_gettime(&tp, NULL); 335 ts.tv_sec = tp.tv_sec + 1; 336 ts.tv_nsec = tp.tv_usec * 1000; 337 /* timed wait in case a signal is not received */ 338 pthread_cond_timedwait(&ipt->cond, &ipt->start_lock, &ts); 339 } 340 pthread_mutex_unlock(&ipt->start_lock); 341 342 /* calculate idleness */ 343 if (ipc.cali_mean != 0.0) { 344 runt = utime_since(&ipt->tps, &ipt->tpe); 345 if (runt) 346 ipt->idleness = ipt->loops * ipc.cali_mean / runt; 347 else 348 ipt->idleness = 0.0; 349 } else 350 ipt->idleness = 0.0; 351 } 352 353 /* 354 * memory allocations are freed via explicit fio_idle_prof_cleanup 355 * after profiling stats are collected by apps. 356 */ 357 } 358 359 /* 360 * return system idle percentage when cpu is -1; 361 * return one cpu idle percentage otherwise. 362 */ 363 static double fio_idle_prof_cpu_stat(int cpu) 364 { 365 int i, nr_cpus = ipc.nr_cpus; 366 struct idle_prof_thread *ipt; 367 double p = 0.0; 368 369 if (ipc.opt == IDLE_PROF_OPT_NONE) 370 return 0.0; 371 372 if ((cpu >= nr_cpus) || (cpu < -1)) { 373 log_err("fio: idle profiling invalid cpu index\n"); 374 return 0.0; 375 } 376 377 if (cpu == -1) { 378 for (i = 0; i < nr_cpus; i++) { 379 ipt = &ipc.ipts[i]; 380 p += ipt->idleness; 381 } 382 p /= nr_cpus; 383 } else { 384 ipt = &ipc.ipts[cpu]; 385 p = ipt->idleness; 386 } 387 388 return p * 100.0; 389 } 390 391 static void fio_idle_prof_cleanup(void) 392 { 393 if (ipc.ipts) { 394 free(ipc.ipts); 395 ipc.ipts = NULL; 396 } 397 398 if (ipc.buf) { 399 free(ipc.buf); 400 ipc.buf = NULL; 401 } 402 } 403 404 int fio_idle_prof_parse_opt(const char *args) 405 { 406 ipc.opt = IDLE_PROF_OPT_NONE; /* default */ 407 408 if (!args) { 409 log_err("fio: empty idle-prof option string\n"); 410 return -1; 411 } 412 413 #if defined(FIO_HAVE_CPU_AFFINITY) && defined(CONFIG_SCHED_IDLE) 414 if (strcmp("calibrate", args) == 0) { 415 ipc.opt = IDLE_PROF_OPT_CALI; 416 fio_idle_prof_init(); 417 fio_idle_prof_start(); 418 fio_idle_prof_stop(); 419 show_idle_prof_stats(FIO_OUTPUT_NORMAL, NULL); 420 return 1; 421 } else if (strcmp("system", args) == 0) { 422 ipc.opt = IDLE_PROF_OPT_SYSTEM; 423 return 0; 424 } else if (strcmp("percpu", args) == 0) { 425 ipc.opt = IDLE_PROF_OPT_PERCPU; 426 return 0; 427 } else { 428 log_err("fio: incorrect idle-prof option: %s\n", args); 429 return -1; 430 } 431 #else 432 log_err("fio: idle-prof not supported on this platform\n"); 433 return -1; 434 #endif 435 } 436 437 void show_idle_prof_stats(int output, struct json_object *parent) 438 { 439 int i, nr_cpus = ipc.nr_cpus; 440 struct json_object *tmp; 441 char s[MAX_CPU_STR_LEN]; 442 443 if (output == FIO_OUTPUT_NORMAL) { 444 if (ipc.opt > IDLE_PROF_OPT_CALI) 445 log_info("\nCPU idleness:\n"); 446 else if (ipc.opt == IDLE_PROF_OPT_CALI) 447 log_info("CPU idleness:\n"); 448 449 if (ipc.opt >= IDLE_PROF_OPT_SYSTEM) 450 log_info(" system: %3.2f%%\n", fio_idle_prof_cpu_stat(-1)); 451 452 if (ipc.opt == IDLE_PROF_OPT_PERCPU) { 453 log_info(" percpu: %3.2f%%", fio_idle_prof_cpu_stat(0)); 454 for (i = 1; i < nr_cpus; i++) 455 log_info(", %3.2f%%", fio_idle_prof_cpu_stat(i)); 456 log_info("\n"); 457 } 458 459 if (ipc.opt >= IDLE_PROF_OPT_CALI) { 460 log_info(" unit work: mean=%3.2fus,", ipc.cali_mean); 461 log_info(" stddev=%3.2f\n", ipc.cali_stddev); 462 } 463 464 /* dynamic mem allocations can now be freed */ 465 if (ipc.opt != IDLE_PROF_OPT_NONE) 466 fio_idle_prof_cleanup(); 467 468 return; 469 } 470 471 if ((ipc.opt != IDLE_PROF_OPT_NONE) && (output == FIO_OUTPUT_JSON)) { 472 if (!parent) 473 return; 474 475 tmp = json_create_object(); 476 if (!tmp) 477 return; 478 479 json_object_add_value_object(parent, "cpu_idleness", tmp); 480 json_object_add_value_float(tmp, "system", fio_idle_prof_cpu_stat(-1)); 481 482 if (ipc.opt == IDLE_PROF_OPT_PERCPU) { 483 for (i = 0; i < nr_cpus; i++) { 484 snprintf(s, MAX_CPU_STR_LEN, "cpu-%d", i); 485 json_object_add_value_float(tmp, s, fio_idle_prof_cpu_stat(i)); 486 } 487 } 488 489 json_object_add_value_float(tmp, "unit_mean", ipc.cali_mean); 490 json_object_add_value_float(tmp, "unit_stddev", ipc.cali_stddev); 491 492 fio_idle_prof_cleanup(); 493 } 494 } 495