1 #include <stdio.h> 2 #include <string.h> 3 #include <sys/time.h> 4 #include <sys/types.h> 5 #include <sys/stat.h> 6 #include <dirent.h> 7 #include <libgen.h> 8 #include <math.h> 9 #include <assert.h> 10 11 #include "fio.h" 12 #include "smalloc.h" 13 #include "diskutil.h" 14 15 static int last_majdev, last_mindev; 16 static struct disk_util *last_du; 17 18 static struct fio_mutex *disk_util_mutex; 19 20 FLIST_HEAD(disk_list); 21 22 static struct disk_util *__init_per_file_disk_util(struct thread_data *td, 23 int majdev, int mindev, char *path); 24 25 static void disk_util_free(struct disk_util *du) 26 { 27 if (du == last_du) 28 last_du = NULL; 29 30 while (!flist_empty(&du->slaves)) { 31 struct disk_util *slave; 32 33 slave = flist_first_entry(&du->slaves, struct disk_util, slavelist); 34 flist_del(&slave->slavelist); 35 slave->users--; 36 } 37 38 fio_mutex_remove(du->lock); 39 sfree(du); 40 } 41 42 static int get_io_ticks(struct disk_util *du, struct disk_util_stat *dus) 43 { 44 unsigned in_flight; 45 unsigned long long sectors[2]; 46 char line[256]; 47 FILE *f; 48 char *p; 49 int ret; 50 51 dprint(FD_DISKUTIL, "open stat file: %s\n", du->path); 52 53 f = fopen(du->path, "r"); 54 if (!f) 55 return 1; 56 57 p = fgets(line, sizeof(line), f); 58 if (!p) { 59 fclose(f); 60 return 1; 61 } 62 63 dprint(FD_DISKUTIL, "%s: %s", du->path, p); 64 65 ret = sscanf(p, "%llu %llu %llu %llu %llu %llu %llu %llu %u %llu %llu\n", 66 (unsigned long long *) &dus->s.ios[0], 67 (unsigned long long *) &dus->s.merges[0], 68 §ors[0], 69 (unsigned long long *) &dus->s.ticks[0], 70 (unsigned long long *) &dus->s.ios[1], 71 (unsigned long long *) &dus->s.merges[1], 72 §ors[1], 73 (unsigned long long *) &dus->s.ticks[1], 74 &in_flight, 75 (unsigned long long *) &dus->s.io_ticks, 76 (unsigned long long *) &dus->s.time_in_queue); 77 fclose(f); 78 dprint(FD_DISKUTIL, "%s: stat read ok? %d\n", du->path, ret == 1); 79 dus->s.sectors[0] = sectors[0]; 80 dus->s.sectors[1] = sectors[1]; 81 return ret != 11; 82 } 83 84 static void update_io_tick_disk(struct disk_util *du) 85 { 86 struct disk_util_stat __dus, *dus, *ldus; 87 struct timeval t; 88 89 if (!du->users) 90 return; 91 if (get_io_ticks(du, &__dus)) 92 return; 93 94 dus = &du->dus; 95 ldus = &du->last_dus; 96 97 dus->s.sectors[0] += (__dus.s.sectors[0] - ldus->s.sectors[0]); 98 dus->s.sectors[1] += (__dus.s.sectors[1] - ldus->s.sectors[1]); 99 dus->s.ios[0] += (__dus.s.ios[0] - ldus->s.ios[0]); 100 dus->s.ios[1] += (__dus.s.ios[1] - ldus->s.ios[1]); 101 dus->s.merges[0] += (__dus.s.merges[0] - ldus->s.merges[0]); 102 dus->s.merges[1] += (__dus.s.merges[1] - ldus->s.merges[1]); 103 dus->s.ticks[0] += (__dus.s.ticks[0] - ldus->s.ticks[0]); 104 dus->s.ticks[1] += (__dus.s.ticks[1] - ldus->s.ticks[1]); 105 dus->s.io_ticks += (__dus.s.io_ticks - ldus->s.io_ticks); 106 dus->s.time_in_queue += (__dus.s.time_in_queue - ldus->s.time_in_queue); 107 108 fio_gettime(&t, NULL); 109 dus->s.msec += mtime_since(&du->time, &t); 110 memcpy(&du->time, &t, sizeof(t)); 111 memcpy(&ldus->s, &__dus.s, sizeof(__dus.s)); 112 } 113 114 int update_io_ticks(void) 115 { 116 struct flist_head *entry; 117 struct disk_util *du; 118 int ret = 0; 119 120 dprint(FD_DISKUTIL, "update io ticks\n"); 121 122 fio_mutex_down(disk_util_mutex); 123 124 if (!helper_exit) { 125 flist_for_each(entry, &disk_list) { 126 du = flist_entry(entry, struct disk_util, list); 127 update_io_tick_disk(du); 128 } 129 } else 130 ret = 1; 131 132 fio_mutex_up(disk_util_mutex); 133 return ret; 134 } 135 136 static struct disk_util *disk_util_exists(int major, int minor) 137 { 138 struct flist_head *entry; 139 struct disk_util *du; 140 141 fio_mutex_down(disk_util_mutex); 142 143 flist_for_each(entry, &disk_list) { 144 du = flist_entry(entry, struct disk_util, list); 145 146 if (major == du->major && minor == du->minor) { 147 fio_mutex_up(disk_util_mutex); 148 return du; 149 } 150 } 151 152 fio_mutex_up(disk_util_mutex); 153 return NULL; 154 } 155 156 static int get_device_numbers(char *file_name, int *maj, int *min) 157 { 158 struct stat st; 159 int majdev, mindev; 160 char tempname[PATH_MAX], *p; 161 162 if (!lstat(file_name, &st)) { 163 if (S_ISBLK(st.st_mode)) { 164 majdev = major(st.st_rdev); 165 mindev = minor(st.st_rdev); 166 } else if (S_ISCHR(st.st_mode)) { 167 majdev = major(st.st_rdev); 168 mindev = minor(st.st_rdev); 169 if (fio_lookup_raw(st.st_rdev, &majdev, &mindev)) 170 return -1; 171 } else if (S_ISFIFO(st.st_mode)) 172 return -1; 173 else { 174 majdev = major(st.st_dev); 175 mindev = minor(st.st_dev); 176 } 177 } else { 178 /* 179 * must be a file, open "." in that path 180 */ 181 strncpy(tempname, file_name, PATH_MAX - 1); 182 p = dirname(tempname); 183 if (stat(p, &st)) { 184 perror("disk util stat"); 185 return -1; 186 } 187 188 majdev = major(st.st_dev); 189 mindev = minor(st.st_dev); 190 } 191 192 *min = mindev; 193 *maj = majdev; 194 195 return 0; 196 } 197 198 static int read_block_dev_entry(char *path, int *maj, int *min) 199 { 200 char line[256], *p; 201 FILE *f; 202 203 f = fopen(path, "r"); 204 if (!f) { 205 perror("open path"); 206 return 1; 207 } 208 209 p = fgets(line, sizeof(line), f); 210 fclose(f); 211 212 if (!p) 213 return 1; 214 215 if (sscanf(p, "%u:%u", maj, min) != 2) 216 return 1; 217 218 return 0; 219 } 220 221 static void find_add_disk_slaves(struct thread_data *td, char *path, 222 struct disk_util *masterdu) 223 { 224 DIR *dirhandle = NULL; 225 struct dirent *dirent = NULL; 226 char slavesdir[PATH_MAX], temppath[PATH_MAX], slavepath[PATH_MAX]; 227 struct disk_util *slavedu = NULL; 228 int majdev, mindev; 229 ssize_t linklen; 230 231 sprintf(slavesdir, "%s/%s", path, "slaves"); 232 dirhandle = opendir(slavesdir); 233 if (!dirhandle) 234 return; 235 236 while ((dirent = readdir(dirhandle)) != NULL) { 237 if (!strcmp(dirent->d_name, ".") || 238 !strcmp(dirent->d_name, "..")) 239 continue; 240 241 sprintf(temppath, "%s%s%s", slavesdir, FIO_OS_PATH_SEPARATOR, dirent->d_name); 242 /* Can we always assume that the slaves device entries 243 * are links to the real directories for the slave 244 * devices? 245 */ 246 linklen = readlink(temppath, slavepath, PATH_MAX - 1); 247 if (linklen < 0) { 248 perror("readlink() for slave device."); 249 closedir(dirhandle); 250 return; 251 } 252 slavepath[linklen] = '\0'; 253 254 sprintf(temppath, "%s/%s/dev", slavesdir, slavepath); 255 if (read_block_dev_entry(temppath, &majdev, &mindev)) { 256 perror("Error getting slave device numbers."); 257 closedir(dirhandle); 258 return; 259 } 260 261 /* 262 * See if this maj,min already exists 263 */ 264 slavedu = disk_util_exists(majdev, mindev); 265 if (slavedu) 266 continue; 267 268 sprintf(temppath, "%s%s%s", slavesdir, FIO_OS_PATH_SEPARATOR, slavepath); 269 __init_per_file_disk_util(td, majdev, mindev, temppath); 270 slavedu = disk_util_exists(majdev, mindev); 271 272 /* Should probably use an assert here. slavedu should 273 * always be present at this point. */ 274 if (slavedu) { 275 slavedu->users++; 276 flist_add_tail(&slavedu->slavelist, &masterdu->slaves); 277 } 278 } 279 280 closedir(dirhandle); 281 } 282 283 static struct disk_util *disk_util_add(struct thread_data *td, int majdev, 284 int mindev, char *path) 285 { 286 struct disk_util *du, *__du; 287 struct flist_head *entry; 288 int l; 289 290 dprint(FD_DISKUTIL, "add maj/min %d/%d: %s\n", majdev, mindev, path); 291 292 du = smalloc(sizeof(*du)); 293 if (!du) { 294 log_err("fio: smalloc() pool exhausted\n"); 295 return NULL; 296 } 297 298 memset(du, 0, sizeof(*du)); 299 INIT_FLIST_HEAD(&du->list); 300 l = snprintf(du->path, sizeof(du->path), "%s/stat", path); 301 if (l < 0 || l >= sizeof(du->path)) { 302 log_err("constructed path \"%.100s[...]/stat\" larger than buffer (%zu bytes)\n", 303 path, sizeof(du->path) - 1); 304 sfree(du); 305 return NULL; 306 } 307 strncpy((char *) du->dus.name, basename(path), FIO_DU_NAME_SZ - 1); 308 du->sysfs_root = path; 309 du->major = majdev; 310 du->minor = mindev; 311 INIT_FLIST_HEAD(&du->slavelist); 312 INIT_FLIST_HEAD(&du->slaves); 313 du->lock = fio_mutex_init(FIO_MUTEX_UNLOCKED); 314 du->users = 0; 315 316 fio_mutex_down(disk_util_mutex); 317 318 flist_for_each(entry, &disk_list) { 319 __du = flist_entry(entry, struct disk_util, list); 320 321 dprint(FD_DISKUTIL, "found %s in list\n", __du->dus.name); 322 323 if (!strcmp((char *) du->dus.name, (char *) __du->dus.name)) { 324 disk_util_free(du); 325 fio_mutex_up(disk_util_mutex); 326 return __du; 327 } 328 } 329 330 dprint(FD_DISKUTIL, "add %s to list\n", du->dus.name); 331 332 fio_gettime(&du->time, NULL); 333 get_io_ticks(du, &du->last_dus); 334 335 flist_add_tail(&du->list, &disk_list); 336 fio_mutex_up(disk_util_mutex); 337 338 find_add_disk_slaves(td, path, du); 339 return du; 340 } 341 342 static int check_dev_match(int majdev, int mindev, char *path) 343 { 344 int major, minor; 345 346 if (read_block_dev_entry(path, &major, &minor)) 347 return 1; 348 349 if (majdev == major && mindev == minor) 350 return 0; 351 352 return 1; 353 } 354 355 static int find_block_dir(int majdev, int mindev, char *path, int link_ok) 356 { 357 struct dirent *dir; 358 struct stat st; 359 int found = 0; 360 DIR *D; 361 362 D = opendir(path); 363 if (!D) 364 return 0; 365 366 while ((dir = readdir(D)) != NULL) { 367 char full_path[256]; 368 369 if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, "..")) 370 continue; 371 372 sprintf(full_path, "%s%s%s", path, FIO_OS_PATH_SEPARATOR, dir->d_name); 373 374 if (!strcmp(dir->d_name, "dev")) { 375 if (!check_dev_match(majdev, mindev, full_path)) { 376 found = 1; 377 break; 378 } 379 } 380 381 if (link_ok) { 382 if (stat(full_path, &st) == -1) { 383 perror("stat"); 384 break; 385 } 386 } else { 387 if (lstat(full_path, &st) == -1) { 388 perror("stat"); 389 break; 390 } 391 } 392 393 if (!S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode)) 394 continue; 395 396 found = find_block_dir(majdev, mindev, full_path, 0); 397 if (found) { 398 strcpy(path, full_path); 399 break; 400 } 401 } 402 403 closedir(D); 404 return found; 405 } 406 407 static struct disk_util *__init_per_file_disk_util(struct thread_data *td, 408 int majdev, int mindev, 409 char *path) 410 { 411 struct stat st; 412 char tmp[PATH_MAX]; 413 char *p; 414 415 /* 416 * If there's a ../queue/ directory there, we are inside a partition. 417 * Check if that is the case and jump back. For loop/md/dm etc we 418 * are already in the right spot. 419 */ 420 sprintf(tmp, "%s/../queue", path); 421 if (!stat(tmp, &st)) { 422 p = dirname(path); 423 sprintf(tmp, "%s/queue", p); 424 if (stat(tmp, &st)) { 425 log_err("unknown sysfs layout\n"); 426 return NULL; 427 } 428 strncpy(tmp, p, PATH_MAX - 1); 429 sprintf(path, "%s", tmp); 430 } 431 432 if (td->o.ioscheduler && !td->sysfs_root) 433 td->sysfs_root = strdup(path); 434 435 return disk_util_add(td, majdev, mindev, path); 436 } 437 438 static struct disk_util *init_per_file_disk_util(struct thread_data *td, 439 char *filename) 440 { 441 442 char foo[PATH_MAX]; 443 struct disk_util *du; 444 int mindev, majdev; 445 446 if (get_device_numbers(filename, &majdev, &mindev)) 447 return NULL; 448 449 dprint(FD_DISKUTIL, "%s belongs to maj/min %d/%d\n", filename, majdev, 450 mindev); 451 452 du = disk_util_exists(majdev, mindev); 453 if (du) { 454 if (td->o.ioscheduler && !td->sysfs_root) 455 td->sysfs_root = strdup(du->sysfs_root); 456 457 return du; 458 } 459 460 /* 461 * for an fs without a device, we will repeatedly stat through 462 * sysfs which can take oodles of time for thousands of files. so 463 * cache the last lookup and compare with that before going through 464 * everything again. 465 */ 466 if (mindev == last_mindev && majdev == last_majdev) 467 return last_du; 468 469 last_mindev = mindev; 470 last_majdev = majdev; 471 472 sprintf(foo, "/sys/block"); 473 if (!find_block_dir(majdev, mindev, foo, 1)) 474 return NULL; 475 476 return __init_per_file_disk_util(td, majdev, mindev, foo); 477 } 478 479 static struct disk_util *__init_disk_util(struct thread_data *td, 480 struct fio_file *f) 481 { 482 return init_per_file_disk_util(td, f->file_name); 483 } 484 485 void init_disk_util(struct thread_data *td) 486 { 487 struct fio_file *f; 488 unsigned int i; 489 490 if (!td->o.do_disk_util || 491 (td->io_ops->flags & (FIO_DISKLESSIO | FIO_NODISKUTIL))) 492 return; 493 494 for_each_file(td, f, i) 495 f->du = __init_disk_util(td, f); 496 } 497 498 static void show_agg_stats(struct disk_util_agg *agg, int terse) 499 { 500 if (!agg->slavecount) 501 return; 502 503 if (!terse) { 504 log_info(", aggrios=%llu/%llu, aggrmerge=%llu/%llu, " 505 "aggrticks=%llu/%llu, aggrin_queue=%llu, " 506 "aggrutil=%3.2f%%", 507 (unsigned long long) agg->ios[0] / agg->slavecount, 508 (unsigned long long) agg->ios[1] / agg->slavecount, 509 (unsigned long long) agg->merges[0] / agg->slavecount, 510 (unsigned long long) agg->merges[1] / agg->slavecount, 511 (unsigned long long) agg->ticks[0] / agg->slavecount, 512 (unsigned long long) agg->ticks[1] / agg->slavecount, 513 (unsigned long long) agg->time_in_queue / agg->slavecount, 514 agg->max_util.u.f); 515 } else { 516 log_info(";slaves;%llu;%llu;%llu;%llu;%llu;%llu;%llu;%3.2f%%", 517 (unsigned long long) agg->ios[0] / agg->slavecount, 518 (unsigned long long) agg->ios[1] / agg->slavecount, 519 (unsigned long long) agg->merges[0] / agg->slavecount, 520 (unsigned long long) agg->merges[1] / agg->slavecount, 521 (unsigned long long) agg->ticks[0] / agg->slavecount, 522 (unsigned long long) agg->ticks[1] / agg->slavecount, 523 (unsigned long long) agg->time_in_queue / agg->slavecount, 524 agg->max_util.u.f); 525 } 526 } 527 528 static void aggregate_slaves_stats(struct disk_util *masterdu) 529 { 530 struct disk_util_agg *agg = &masterdu->agg; 531 struct disk_util_stat *dus; 532 struct flist_head *entry; 533 struct disk_util *slavedu; 534 double util; 535 536 flist_for_each(entry, &masterdu->slaves) { 537 slavedu = flist_entry(entry, struct disk_util, slavelist); 538 dus = &slavedu->dus; 539 agg->ios[0] += dus->s.ios[0]; 540 agg->ios[1] += dus->s.ios[1]; 541 agg->merges[0] += dus->s.merges[0]; 542 agg->merges[1] += dus->s.merges[1]; 543 agg->sectors[0] += dus->s.sectors[0]; 544 agg->sectors[1] += dus->s.sectors[1]; 545 agg->ticks[0] += dus->s.ticks[0]; 546 agg->ticks[1] += dus->s.ticks[1]; 547 agg->time_in_queue += dus->s.time_in_queue; 548 agg->slavecount++; 549 550 util = (double) (100 * dus->s.io_ticks / (double) slavedu->dus.s.msec); 551 /* System utilization is the utilization of the 552 * component with the highest utilization. 553 */ 554 if (util > agg->max_util.u.f) 555 agg->max_util.u.f = util; 556 557 } 558 559 if (agg->max_util.u.f > 100.0) 560 agg->max_util.u.f = 100.0; 561 } 562 563 void disk_util_prune_entries(void) 564 { 565 fio_mutex_down(disk_util_mutex); 566 567 while (!flist_empty(&disk_list)) { 568 struct disk_util *du; 569 570 du = flist_first_entry(&disk_list, struct disk_util, list); 571 flist_del(&du->list); 572 disk_util_free(du); 573 } 574 575 last_majdev = last_mindev = -1; 576 fio_mutex_up(disk_util_mutex); 577 fio_mutex_remove(disk_util_mutex); 578 } 579 580 void print_disk_util(struct disk_util_stat *dus, struct disk_util_agg *agg, 581 int terse) 582 { 583 double util = 0; 584 585 if (dus->s.msec) 586 util = (double) 100 * dus->s.io_ticks / (double) dus->s.msec; 587 if (util > 100.0) 588 util = 100.0; 589 590 if (!terse) { 591 if (agg->slavecount) 592 log_info(" "); 593 594 log_info(" %s: ios=%llu/%llu, merge=%llu/%llu, " 595 "ticks=%llu/%llu, in_queue=%llu, util=%3.2f%%", 596 dus->name, 597 (unsigned long long) dus->s.ios[0], 598 (unsigned long long) dus->s.ios[1], 599 (unsigned long long) dus->s.merges[0], 600 (unsigned long long) dus->s.merges[1], 601 (unsigned long long) dus->s.ticks[0], 602 (unsigned long long) dus->s.ticks[1], 603 (unsigned long long) dus->s.time_in_queue, 604 util); 605 } else { 606 log_info(";%s;%llu;%llu;%llu;%llu;%llu;%llu;%llu;%3.2f%%", 607 dus->name, 608 (unsigned long long) dus->s.ios[0], 609 (unsigned long long) dus->s.ios[1], 610 (unsigned long long) dus->s.merges[0], 611 (unsigned long long) dus->s.merges[1], 612 (unsigned long long) dus->s.ticks[0], 613 (unsigned long long) dus->s.ticks[1], 614 (unsigned long long) dus->s.time_in_queue, 615 util); 616 } 617 618 /* 619 * If the device has slaves, aggregate the stats for 620 * those slave devices also. 621 */ 622 show_agg_stats(agg, terse); 623 624 if (!terse) 625 log_info("\n"); 626 } 627 628 void json_array_add_disk_util(struct disk_util_stat *dus, 629 struct disk_util_agg *agg, struct json_array *array) 630 { 631 struct json_object *obj; 632 double util = 0; 633 634 if (dus->s.msec) 635 util = (double) 100 * dus->s.io_ticks / (double) dus->s.msec; 636 if (util > 100.0) 637 util = 100.0; 638 639 obj = json_create_object(); 640 json_array_add_value_object(array, obj); 641 642 json_object_add_value_string(obj, "name", dus->name); 643 json_object_add_value_int(obj, "read_ios", dus->s.ios[0]); 644 json_object_add_value_int(obj, "write_ios", dus->s.ios[1]); 645 json_object_add_value_int(obj, "read_merges", dus->s.merges[0]); 646 json_object_add_value_int(obj, "write_merges", dus->s.merges[1]); 647 json_object_add_value_int(obj, "read_ticks", dus->s.ticks[0]); 648 json_object_add_value_int(obj, "write_ticks", dus->s.ticks[1]); 649 json_object_add_value_int(obj, "in_queue", dus->s.time_in_queue); 650 json_object_add_value_float(obj, "util", util); 651 652 /* 653 * If the device has slaves, aggregate the stats for 654 * those slave devices also. 655 */ 656 if (!agg->slavecount) 657 return; 658 json_object_add_value_int(obj, "aggr_read_ios", 659 agg->ios[0] / agg->slavecount); 660 json_object_add_value_int(obj, "aggr_write_ios", 661 agg->ios[1] / agg->slavecount); 662 json_object_add_value_int(obj, "aggr_read_merges", 663 agg->merges[0] / agg->slavecount); 664 json_object_add_value_int(obj, "aggr_write_merge", 665 agg->merges[1] / agg->slavecount); 666 json_object_add_value_int(obj, "aggr_read_ticks", 667 agg->ticks[0] / agg->slavecount); 668 json_object_add_value_int(obj, "aggr_write_ticks", 669 agg->ticks[1] / agg->slavecount); 670 json_object_add_value_int(obj, "aggr_in_queue", 671 agg->time_in_queue / agg->slavecount); 672 json_object_add_value_float(obj, "aggr_util", agg->max_util.u.f); 673 } 674 675 static void json_object_add_disk_utils(struct json_object *obj, 676 struct flist_head *head) 677 { 678 struct json_array *array = json_create_array(); 679 struct flist_head *entry; 680 struct disk_util *du; 681 682 json_object_add_value_array(obj, "disk_util", array); 683 684 flist_for_each(entry, head) { 685 du = flist_entry(entry, struct disk_util, list); 686 687 aggregate_slaves_stats(du); 688 json_array_add_disk_util(&du->dus, &du->agg, array); 689 } 690 } 691 692 void show_disk_util(int terse, struct json_object *parent) 693 { 694 struct flist_head *entry; 695 struct disk_util *du; 696 697 if (!disk_util_mutex) 698 return; 699 700 fio_mutex_down(disk_util_mutex); 701 702 if (flist_empty(&disk_list)) { 703 fio_mutex_up(disk_util_mutex); 704 return; 705 } 706 707 if (output_format == FIO_OUTPUT_JSON) 708 assert(parent); 709 710 if (!terse && output_format != FIO_OUTPUT_JSON) 711 log_info("\nDisk stats (read/write):\n"); 712 713 if (output_format == FIO_OUTPUT_JSON) 714 json_object_add_disk_utils(parent, &disk_list); 715 else 716 flist_for_each(entry, &disk_list) { 717 du = flist_entry(entry, struct disk_util, list); 718 719 aggregate_slaves_stats(du); 720 print_disk_util(&du->dus, &du->agg, terse); 721 } 722 723 fio_mutex_up(disk_util_mutex); 724 } 725 726 void setup_disk_util(void) 727 { 728 disk_util_mutex = fio_mutex_init(FIO_MUTEX_UNLOCKED); 729 } 730