Home | History | Annotate | Download | only in fio
      1 #include <math.h>
      2 #include "json.h"
      3 #include "idletime.h"
      4 
      5 static volatile struct idle_prof_common ipc;
      6 
      7 /*
      8  * Get time to complete an unit work on a particular cpu.
      9  * The minimum number in CALIBRATE_RUNS runs is returned.
     10  */
     11 static double calibrate_unit(unsigned char *data)
     12 {
     13 	unsigned long t, i, j, k;
     14 	struct timeval tps;
     15 	double tunit = 0.0;
     16 
     17 	for (i = 0; i < CALIBRATE_RUNS; i++) {
     18 
     19 		fio_gettime(&tps, NULL);
     20 		/* scale for less variance */
     21 		for (j = 0; j < CALIBRATE_SCALE; j++) {
     22 			/* unit of work */
     23 			for (k=0; k < page_size; k++) {
     24 				data[(k + j) % page_size] = k % 256;
     25 				/*
     26 				 * we won't see STOP here. this is to match
     27 				 * the same statement in the profiling loop.
     28 				 */
     29 				if (ipc.status == IDLE_PROF_STATUS_PROF_STOP)
     30 					return 0.0;
     31 			}
     32 		}
     33 
     34 		t = utime_since_now(&tps);
     35 		if (!t)
     36 			continue;
     37 
     38 		/* get the minimum time to complete CALIBRATE_SCALE units */
     39 		if ((i == 0) || ((double)t < tunit))
     40 			tunit = (double)t;
     41 	}
     42 
     43 	return tunit / CALIBRATE_SCALE;
     44 }
     45 
     46 static int set_cpu_affinity(struct idle_prof_thread *ipt)
     47 {
     48 #if defined(FIO_HAVE_CPU_AFFINITY)
     49 	os_cpu_mask_t cpu_mask;
     50 
     51 	memset(&cpu_mask, 0, sizeof(cpu_mask));
     52 	fio_cpu_set(&cpu_mask, ipt->cpu);
     53 
     54 	if (fio_setaffinity(gettid(), cpu_mask)) {
     55 		log_err("fio: fio_setaffinity failed\n");
     56 		return -1;
     57 	}
     58 
     59 	return 0;
     60 #else
     61 	log_err("fio: fio_setaffinity not supported\n");
     62 	return -1;
     63 #endif
     64 }
     65 
     66 static void *idle_prof_thread_fn(void *data)
     67 {
     68 	int retval;
     69 	unsigned long j, k;
     70 	struct idle_prof_thread *ipt = data;
     71 
     72 	/* wait for all threads are spawned */
     73 	pthread_mutex_lock(&ipt->init_lock);
     74 
     75 	/* exit if any other thread failed to start */
     76 	if (ipc.status == IDLE_PROF_STATUS_ABORT) {
     77 		pthread_mutex_unlock(&ipt->init_lock);
     78 		return NULL;
     79 	}
     80 
     81 	retval = set_cpu_affinity(ipt);
     82 	if (retval == -1) {
     83 		ipt->state = TD_EXITED;
     84 		pthread_mutex_unlock(&ipt->init_lock);
     85 		return NULL;
     86         }
     87 
     88 	ipt->cali_time = calibrate_unit(ipt->data);
     89 
     90 	/* delay to set IDLE class till now for better calibration accuracy */
     91 #if defined(CONFIG_SCHED_IDLE)
     92 	if ((retval = fio_set_sched_idle()))
     93 		log_err("fio: fio_set_sched_idle failed\n");
     94 #else
     95 	retval = -1;
     96 	log_err("fio: fio_set_sched_idle not supported\n");
     97 #endif
     98 	if (retval == -1) {
     99 		ipt->state = TD_EXITED;
    100 		pthread_mutex_unlock(&ipt->init_lock);
    101 		return NULL;
    102 	}
    103 
    104 	ipt->state = TD_INITIALIZED;
    105 
    106 	/* signal the main thread that calibration is done */
    107 	pthread_cond_signal(&ipt->cond);
    108 	pthread_mutex_unlock(&ipt->init_lock);
    109 
    110 	/* wait for other calibration to finish */
    111 	pthread_mutex_lock(&ipt->start_lock);
    112 
    113 	/* exit if other threads failed to initialize */
    114 	if (ipc.status == IDLE_PROF_STATUS_ABORT) {
    115 		pthread_mutex_unlock(&ipt->start_lock);
    116 		return NULL;
    117 	}
    118 
    119 	/* exit if we are doing calibration only */
    120 	if (ipc.status == IDLE_PROF_STATUS_CALI_STOP) {
    121 		pthread_mutex_unlock(&ipt->start_lock);
    122 		return NULL;
    123 	}
    124 
    125 	fio_gettime(&ipt->tps, NULL);
    126 	ipt->state = TD_RUNNING;
    127 
    128 	j = 0;
    129 	while (1) {
    130 		for (k = 0; k < page_size; k++) {
    131 			ipt->data[(k + j) % page_size] = k % 256;
    132 			if (ipc.status == IDLE_PROF_STATUS_PROF_STOP) {
    133 				fio_gettime(&ipt->tpe, NULL);
    134 				goto idle_prof_done;
    135 			}
    136 		}
    137 		j++;
    138 	}
    139 
    140 idle_prof_done:
    141 
    142 	ipt->loops = j + (double) k / page_size;
    143 	ipt->state = TD_EXITED;
    144 	pthread_mutex_unlock(&ipt->start_lock);
    145 
    146 	return NULL;
    147 }
    148 
    149 /* calculate mean and standard deviation to complete an unit of work */
    150 static void calibration_stats(void)
    151 {
    152 	int i;
    153 	double sum = 0.0, var = 0.0;
    154 	struct idle_prof_thread *ipt;
    155 
    156 	for (i = 0; i < ipc.nr_cpus; i++) {
    157 		ipt = &ipc.ipts[i];
    158 		sum += ipt->cali_time;
    159 	}
    160 
    161 	ipc.cali_mean = sum/ipc.nr_cpus;
    162 
    163 	for (i = 0; i < ipc.nr_cpus; i++) {
    164 		ipt = &ipc.ipts[i];
    165 		var += pow(ipt->cali_time-ipc.cali_mean, 2);
    166 	}
    167 
    168 	ipc.cali_stddev = sqrt(var/(ipc.nr_cpus-1));
    169 }
    170 
    171 void fio_idle_prof_init(void)
    172 {
    173 	int i, ret;
    174 	struct timeval tp;
    175 	struct timespec ts;
    176 	pthread_attr_t tattr;
    177 	struct idle_prof_thread *ipt;
    178 
    179 	ipc.nr_cpus = cpus_online();
    180 	ipc.status = IDLE_PROF_STATUS_OK;
    181 
    182 	if (ipc.opt == IDLE_PROF_OPT_NONE)
    183 		return;
    184 
    185 	if ((ret = pthread_attr_init(&tattr))) {
    186 		log_err("fio: pthread_attr_init %s\n", strerror(ret));
    187 		return;
    188 	}
    189 	if ((ret = pthread_attr_setscope(&tattr, PTHREAD_SCOPE_SYSTEM))) {
    190 		log_err("fio: pthread_attr_setscope %s\n", strerror(ret));
    191 		return;
    192 	}
    193 
    194 	ipc.ipts = malloc(ipc.nr_cpus * sizeof(struct idle_prof_thread));
    195 	if (!ipc.ipts) {
    196 		log_err("fio: malloc failed\n");
    197 		return;
    198 	}
    199 
    200 	ipc.buf = malloc(ipc.nr_cpus * page_size);
    201 	if (!ipc.buf) {
    202 		log_err("fio: malloc failed\n");
    203 		free(ipc.ipts);
    204 		return;
    205 	}
    206 
    207 	/*
    208 	 * profiling aborts on any single thread failure since the
    209 	 * result won't be accurate if any cpu is not used.
    210 	 */
    211 	for (i = 0; i < ipc.nr_cpus; i++) {
    212 		ipt = &ipc.ipts[i];
    213 
    214 		ipt->cpu = i;
    215 		ipt->state = TD_NOT_CREATED;
    216 		ipt->data = (unsigned char *)(ipc.buf + page_size * i);
    217 
    218 		if ((ret = pthread_mutex_init(&ipt->init_lock, NULL))) {
    219 			ipc.status = IDLE_PROF_STATUS_ABORT;
    220 			log_err("fio: pthread_mutex_init %s\n", strerror(ret));
    221 			break;
    222 		}
    223 
    224 		if ((ret = pthread_mutex_init(&ipt->start_lock, NULL))) {
    225 			ipc.status = IDLE_PROF_STATUS_ABORT;
    226 			log_err("fio: pthread_mutex_init %s\n", strerror(ret));
    227 			break;
    228 		}
    229 
    230 		if ((ret = pthread_cond_init(&ipt->cond, NULL))) {
    231 			ipc.status = IDLE_PROF_STATUS_ABORT;
    232 			log_err("fio: pthread_cond_init %s\n", strerror(ret));
    233 			break;
    234 		}
    235 
    236 		/* make sure all threads are spawned before they start */
    237 		pthread_mutex_lock(&ipt->init_lock);
    238 
    239 		/* make sure all threads finish init before profiling starts */
    240 		pthread_mutex_lock(&ipt->start_lock);
    241 
    242 		if ((ret = pthread_create(&ipt->thread, &tattr, idle_prof_thread_fn, ipt))) {
    243 			ipc.status = IDLE_PROF_STATUS_ABORT;
    244 			log_err("fio: pthread_create %s\n", strerror(ret));
    245 			break;
    246 		} else
    247 			ipt->state = TD_CREATED;
    248 
    249 		if ((ret = pthread_detach(ipt->thread))) {
    250 			/* log error and let the thread spin */
    251 			log_err("fio: pthread_detatch %s\n", strerror(ret));
    252 		}
    253 	}
    254 
    255 	/*
    256 	 * let good threads continue so that they can exit
    257 	 * if errors on other threads occurred previously.
    258 	 */
    259 	for (i = 0; i < ipc.nr_cpus; i++) {
    260 		ipt = &ipc.ipts[i];
    261 		pthread_mutex_unlock(&ipt->init_lock);
    262 	}
    263 
    264 	if (ipc.status == IDLE_PROF_STATUS_ABORT)
    265 		return;
    266 
    267 	/* wait for calibration to finish */
    268 	for (i = 0; i < ipc.nr_cpus; i++) {
    269 		ipt = &ipc.ipts[i];
    270 		pthread_mutex_lock(&ipt->init_lock);
    271 		while ((ipt->state != TD_EXITED) &&
    272 		       (ipt->state!=TD_INITIALIZED)) {
    273 			fio_gettime(&tp, NULL);
    274 			ts.tv_sec = tp.tv_sec + 1;
    275 			ts.tv_nsec = tp.tv_usec * 1000;
    276 			pthread_cond_timedwait(&ipt->cond, &ipt->init_lock, &ts);
    277 		}
    278 		pthread_mutex_unlock(&ipt->init_lock);
    279 
    280 		/*
    281 		 * any thread failed to initialize would abort other threads
    282 		 * later after fio_idle_prof_start.
    283 		 */
    284 		if (ipt->state == TD_EXITED)
    285 			ipc.status = IDLE_PROF_STATUS_ABORT;
    286 	}
    287 
    288 	if (ipc.status != IDLE_PROF_STATUS_ABORT)
    289 		calibration_stats();
    290 	else
    291 		ipc.cali_mean = ipc.cali_stddev = 0.0;
    292 
    293 	if (ipc.opt == IDLE_PROF_OPT_CALI)
    294 		ipc.status = IDLE_PROF_STATUS_CALI_STOP;
    295 }
    296 
    297 void fio_idle_prof_start(void)
    298 {
    299 	int i;
    300 	struct idle_prof_thread *ipt;
    301 
    302 	if (ipc.opt == IDLE_PROF_OPT_NONE)
    303 		return;
    304 
    305 	/* unlock regardless abort is set or not */
    306 	for (i = 0; i < ipc.nr_cpus; i++) {
    307 		ipt = &ipc.ipts[i];
    308 		pthread_mutex_unlock(&ipt->start_lock);
    309 	}
    310 }
    311 
    312 void fio_idle_prof_stop(void)
    313 {
    314 	int i;
    315 	uint64_t runt;
    316 	struct timeval tp;
    317 	struct timespec ts;
    318 	struct idle_prof_thread *ipt;
    319 
    320 	if (ipc.opt == IDLE_PROF_OPT_NONE)
    321 		return;
    322 
    323 	if (ipc.opt == IDLE_PROF_OPT_CALI)
    324 		return;
    325 
    326 	ipc.status = IDLE_PROF_STATUS_PROF_STOP;
    327 
    328 	/* wait for all threads to exit from profiling */
    329 	for (i = 0; i < ipc.nr_cpus; i++) {
    330 		ipt = &ipc.ipts[i];
    331 		pthread_mutex_lock(&ipt->start_lock);
    332 		while ((ipt->state != TD_EXITED) &&
    333 		       (ipt->state!=TD_NOT_CREATED)) {
    334 			fio_gettime(&tp, NULL);
    335 			ts.tv_sec = tp.tv_sec + 1;
    336 			ts.tv_nsec = tp.tv_usec * 1000;
    337 			/* timed wait in case a signal is not received */
    338 			pthread_cond_timedwait(&ipt->cond, &ipt->start_lock, &ts);
    339 		}
    340 		pthread_mutex_unlock(&ipt->start_lock);
    341 
    342 		/* calculate idleness */
    343 		if (ipc.cali_mean != 0.0) {
    344 			runt = utime_since(&ipt->tps, &ipt->tpe);
    345 			if (runt)
    346 				ipt->idleness = ipt->loops * ipc.cali_mean / runt;
    347 			else
    348 				ipt->idleness = 0.0;
    349 		} else
    350 			ipt->idleness = 0.0;
    351 	}
    352 
    353 	/*
    354 	 * memory allocations are freed via explicit fio_idle_prof_cleanup
    355 	 * after profiling stats are collected by apps.
    356 	 */
    357 }
    358 
    359 /*
    360  * return system idle percentage when cpu is -1;
    361  * return one cpu idle percentage otherwise.
    362  */
    363 static double fio_idle_prof_cpu_stat(int cpu)
    364 {
    365 	int i, nr_cpus = ipc.nr_cpus;
    366 	struct idle_prof_thread *ipt;
    367 	double p = 0.0;
    368 
    369 	if (ipc.opt == IDLE_PROF_OPT_NONE)
    370 		return 0.0;
    371 
    372 	if ((cpu >= nr_cpus) || (cpu < -1)) {
    373 		log_err("fio: idle profiling invalid cpu index\n");
    374 		return 0.0;
    375 	}
    376 
    377 	if (cpu == -1) {
    378 		for (i = 0; i < nr_cpus; i++) {
    379 			ipt = &ipc.ipts[i];
    380 			p += ipt->idleness;
    381 		}
    382 		p /= nr_cpus;
    383 	} else {
    384 		ipt = &ipc.ipts[cpu];
    385 		p = ipt->idleness;
    386 	}
    387 
    388 	return p * 100.0;
    389 }
    390 
    391 static void fio_idle_prof_cleanup(void)
    392 {
    393 	if (ipc.ipts) {
    394 		free(ipc.ipts);
    395 		ipc.ipts = NULL;
    396 	}
    397 
    398 	if (ipc.buf) {
    399 		free(ipc.buf);
    400 		ipc.buf = NULL;
    401 	}
    402 }
    403 
    404 int fio_idle_prof_parse_opt(const char *args)
    405 {
    406 	ipc.opt = IDLE_PROF_OPT_NONE; /* default */
    407 
    408 	if (!args) {
    409 		log_err("fio: empty idle-prof option string\n");
    410 		return -1;
    411 	}
    412 
    413 #if defined(FIO_HAVE_CPU_AFFINITY) && defined(CONFIG_SCHED_IDLE)
    414 	if (strcmp("calibrate", args) == 0) {
    415 		ipc.opt = IDLE_PROF_OPT_CALI;
    416 		fio_idle_prof_init();
    417 		fio_idle_prof_start();
    418 		fio_idle_prof_stop();
    419 		show_idle_prof_stats(FIO_OUTPUT_NORMAL, NULL);
    420 		return 1;
    421 	} else if (strcmp("system", args) == 0) {
    422 		ipc.opt = IDLE_PROF_OPT_SYSTEM;
    423 		return 0;
    424 	} else if (strcmp("percpu", args) == 0) {
    425 		ipc.opt = IDLE_PROF_OPT_PERCPU;
    426 		return 0;
    427 	} else {
    428 		log_err("fio: incorrect idle-prof option: %s\n", args);
    429 		return -1;
    430 	}
    431 #else
    432 	log_err("fio: idle-prof not supported on this platform\n");
    433 	return -1;
    434 #endif
    435 }
    436 
    437 void show_idle_prof_stats(int output, struct json_object *parent)
    438 {
    439 	int i, nr_cpus = ipc.nr_cpus;
    440 	struct json_object *tmp;
    441 	char s[MAX_CPU_STR_LEN];
    442 
    443 	if (output == FIO_OUTPUT_NORMAL) {
    444 		if (ipc.opt > IDLE_PROF_OPT_CALI)
    445 			log_info("\nCPU idleness:\n");
    446 		else if (ipc.opt == IDLE_PROF_OPT_CALI)
    447 			log_info("CPU idleness:\n");
    448 
    449 		if (ipc.opt >= IDLE_PROF_OPT_SYSTEM)
    450 			log_info("  system: %3.2f%%\n", fio_idle_prof_cpu_stat(-1));
    451 
    452 		if (ipc.opt == IDLE_PROF_OPT_PERCPU) {
    453 			log_info("  percpu: %3.2f%%", fio_idle_prof_cpu_stat(0));
    454 			for (i = 1; i < nr_cpus; i++)
    455 				log_info(", %3.2f%%", fio_idle_prof_cpu_stat(i));
    456 			log_info("\n");
    457 		}
    458 
    459 		if (ipc.opt >= IDLE_PROF_OPT_CALI) {
    460 			log_info("  unit work: mean=%3.2fus,", ipc.cali_mean);
    461 			log_info(" stddev=%3.2f\n", ipc.cali_stddev);
    462 		}
    463 
    464 		/* dynamic mem allocations can now be freed */
    465 		if (ipc.opt != IDLE_PROF_OPT_NONE)
    466 			fio_idle_prof_cleanup();
    467 
    468 		return;
    469 	}
    470 
    471 	if ((ipc.opt != IDLE_PROF_OPT_NONE) && (output == FIO_OUTPUT_JSON)) {
    472 		if (!parent)
    473 			return;
    474 
    475 		tmp = json_create_object();
    476 		if (!tmp)
    477 			return;
    478 
    479 		json_object_add_value_object(parent, "cpu_idleness", tmp);
    480 		json_object_add_value_float(tmp, "system", fio_idle_prof_cpu_stat(-1));
    481 
    482 		if (ipc.opt == IDLE_PROF_OPT_PERCPU) {
    483 			for (i = 0; i < nr_cpus; i++) {
    484 				snprintf(s, MAX_CPU_STR_LEN, "cpu-%d", i);
    485 				json_object_add_value_float(tmp, s, fio_idle_prof_cpu_stat(i));
    486 			}
    487 		}
    488 
    489 		json_object_add_value_float(tmp, "unit_mean", ipc.cali_mean);
    490 		json_object_add_value_float(tmp, "unit_stddev", ipc.cali_stddev);
    491 
    492 		fio_idle_prof_cleanup();
    493 	}
    494 }
    495