1 #include <stdio.h> 2 #include <string.h> 3 #include <sys/time.h> 4 #include <sys/types.h> 5 #include <sys/stat.h> 6 #include <dirent.h> 7 #include <libgen.h> 8 #include <math.h> 9 #include <assert.h> 10 11 #include "fio.h" 12 #include "smalloc.h" 13 #include "diskutil.h" 14 15 static int last_majdev, last_mindev; 16 static struct disk_util *last_du; 17 18 static struct fio_mutex *disk_util_mutex; 19 20 FLIST_HEAD(disk_list); 21 22 static struct disk_util *__init_per_file_disk_util(struct thread_data *td, 23 int majdev, int mindev, char *path); 24 25 static void disk_util_free(struct disk_util *du) 26 { 27 if (du == last_du) 28 last_du = NULL; 29 30 while (!flist_empty(&du->slaves)) { 31 struct disk_util *slave; 32 33 slave = flist_entry(du->slaves.next, struct disk_util, slavelist); 34 flist_del(&slave->slavelist); 35 slave->users--; 36 } 37 38 fio_mutex_remove(du->lock); 39 sfree(du); 40 } 41 42 static int get_io_ticks(struct disk_util *du, struct disk_util_stat *dus) 43 { 44 unsigned in_flight; 45 unsigned long long sectors[2]; 46 char line[256]; 47 FILE *f; 48 char *p; 49 int ret; 50 51 dprint(FD_DISKUTIL, "open stat file: %s\n", du->path); 52 53 f = fopen(du->path, "r"); 54 if (!f) 55 return 1; 56 57 p = fgets(line, sizeof(line), f); 58 if (!p) { 59 fclose(f); 60 return 1; 61 } 62 63 dprint(FD_DISKUTIL, "%s: %s", du->path, p); 64 65 ret = sscanf(p, "%u %u %llu %u %u %u %llu %u %u %u %u\n", 66 &dus->s.ios[0], 67 &dus->s.merges[0], §ors[0], 68 &dus->s.ticks[0], &dus->s.ios[1], 69 &dus->s.merges[1], §ors[1], 70 &dus->s.ticks[1], &in_flight, 71 &dus->s.io_ticks, 72 &dus->s.time_in_queue); 73 fclose(f); 74 dprint(FD_DISKUTIL, "%s: stat read ok? %d\n", du->path, ret == 1); 75 dus->s.sectors[0] = sectors[0]; 76 dus->s.sectors[1] = sectors[1]; 77 return ret != 11; 78 } 79 80 static void update_io_tick_disk(struct disk_util *du) 81 { 82 struct disk_util_stat __dus, *dus, *ldus; 83 struct timeval t; 84 85 if (!du->users) 86 return; 87 if (get_io_ticks(du, &__dus)) 88 return; 89 90 dus = &du->dus; 91 ldus = &du->last_dus; 92 93 dus->s.sectors[0] += (__dus.s.sectors[0] - ldus->s.sectors[0]); 94 dus->s.sectors[1] += (__dus.s.sectors[1] - ldus->s.sectors[1]); 95 dus->s.ios[0] += (__dus.s.ios[0] - ldus->s.ios[0]); 96 dus->s.ios[1] += (__dus.s.ios[1] - ldus->s.ios[1]); 97 dus->s.merges[0] += (__dus.s.merges[0] - ldus->s.merges[0]); 98 dus->s.merges[1] += (__dus.s.merges[1] - ldus->s.merges[1]); 99 dus->s.ticks[0] += (__dus.s.ticks[0] - ldus->s.ticks[0]); 100 dus->s.ticks[1] += (__dus.s.ticks[1] - ldus->s.ticks[1]); 101 dus->s.io_ticks += (__dus.s.io_ticks - ldus->s.io_ticks); 102 dus->s.time_in_queue += (__dus.s.time_in_queue - ldus->s.time_in_queue); 103 104 fio_gettime(&t, NULL); 105 dus->s.msec += mtime_since(&du->time, &t); 106 memcpy(&du->time, &t, sizeof(t)); 107 memcpy(&ldus->s, &__dus.s, sizeof(__dus.s)); 108 } 109 110 int update_io_ticks(void) 111 { 112 struct flist_head *entry; 113 struct disk_util *du; 114 int ret = 0; 115 116 dprint(FD_DISKUTIL, "update io ticks\n"); 117 118 fio_mutex_down(disk_util_mutex); 119 120 if (!disk_util_exit) { 121 flist_for_each(entry, &disk_list) { 122 du = flist_entry(entry, struct disk_util, list); 123 update_io_tick_disk(du); 124 } 125 } else 126 ret = 1; 127 128 fio_mutex_up(disk_util_mutex); 129 return ret; 130 } 131 132 static struct disk_util *disk_util_exists(int major, int minor) 133 { 134 struct flist_head *entry; 135 struct disk_util *du; 136 137 fio_mutex_down(disk_util_mutex); 138 139 flist_for_each(entry, &disk_list) { 140 du = flist_entry(entry, struct disk_util, list); 141 142 if (major == du->major && minor == du->minor) { 143 fio_mutex_up(disk_util_mutex); 144 return du; 145 } 146 } 147 148 fio_mutex_up(disk_util_mutex); 149 return NULL; 150 } 151 152 static int get_device_numbers(char *file_name, int *maj, int *min) 153 { 154 struct stat st; 155 int majdev, mindev; 156 char tempname[PATH_MAX], *p; 157 158 if (!lstat(file_name, &st)) { 159 if (S_ISBLK(st.st_mode)) { 160 majdev = major(st.st_rdev); 161 mindev = minor(st.st_rdev); 162 } else if (S_ISCHR(st.st_mode)) { 163 majdev = major(st.st_rdev); 164 mindev = minor(st.st_rdev); 165 if (fio_lookup_raw(st.st_rdev, &majdev, &mindev)) 166 return -1; 167 } else if (S_ISFIFO(st.st_mode)) 168 return -1; 169 else { 170 majdev = major(st.st_dev); 171 mindev = minor(st.st_dev); 172 } 173 } else { 174 /* 175 * must be a file, open "." in that path 176 */ 177 strncpy(tempname, file_name, PATH_MAX - 1); 178 p = dirname(tempname); 179 if (stat(p, &st)) { 180 perror("disk util stat"); 181 return -1; 182 } 183 184 majdev = major(st.st_dev); 185 mindev = minor(st.st_dev); 186 } 187 188 *min = mindev; 189 *maj = majdev; 190 191 return 0; 192 } 193 194 static int read_block_dev_entry(char *path, int *maj, int *min) 195 { 196 char line[256], *p; 197 FILE *f; 198 199 f = fopen(path, "r"); 200 if (!f) { 201 perror("open path"); 202 return 1; 203 } 204 205 p = fgets(line, sizeof(line), f); 206 fclose(f); 207 208 if (!p) 209 return 1; 210 211 if (sscanf(p, "%u:%u", maj, min) != 2) 212 return 1; 213 214 return 0; 215 } 216 217 static void find_add_disk_slaves(struct thread_data *td, char *path, 218 struct disk_util *masterdu) 219 { 220 DIR *dirhandle = NULL; 221 struct dirent *dirent = NULL; 222 char slavesdir[PATH_MAX], temppath[PATH_MAX], slavepath[PATH_MAX]; 223 struct disk_util *slavedu = NULL; 224 int majdev, mindev; 225 ssize_t linklen; 226 227 sprintf(slavesdir, "%s/%s", path, "slaves"); 228 dirhandle = opendir(slavesdir); 229 if (!dirhandle) 230 return; 231 232 while ((dirent = readdir(dirhandle)) != NULL) { 233 if (!strcmp(dirent->d_name, ".") || 234 !strcmp(dirent->d_name, "..")) 235 continue; 236 237 sprintf(temppath, "%s%s%s", slavesdir, FIO_OS_PATH_SEPARATOR, dirent->d_name); 238 /* Can we always assume that the slaves device entries 239 * are links to the real directories for the slave 240 * devices? 241 */ 242 linklen = readlink(temppath, slavepath, PATH_MAX - 1); 243 if (linklen < 0) { 244 perror("readlink() for slave device."); 245 closedir(dirhandle); 246 return; 247 } 248 slavepath[linklen] = '\0'; 249 250 sprintf(temppath, "%s/%s/dev", slavesdir, slavepath); 251 if (read_block_dev_entry(temppath, &majdev, &mindev)) { 252 perror("Error getting slave device numbers."); 253 closedir(dirhandle); 254 return; 255 } 256 257 /* 258 * See if this maj,min already exists 259 */ 260 slavedu = disk_util_exists(majdev, mindev); 261 if (slavedu) 262 continue; 263 264 sprintf(temppath, "%s%s%s", slavesdir, FIO_OS_PATH_SEPARATOR, slavepath); 265 __init_per_file_disk_util(td, majdev, mindev, temppath); 266 slavedu = disk_util_exists(majdev, mindev); 267 268 /* Should probably use an assert here. slavedu should 269 * always be present at this point. */ 270 if (slavedu) { 271 slavedu->users++; 272 flist_add_tail(&slavedu->slavelist, &masterdu->slaves); 273 } 274 } 275 276 closedir(dirhandle); 277 } 278 279 static struct disk_util *disk_util_add(struct thread_data *td, int majdev, 280 int mindev, char *path) 281 { 282 struct disk_util *du, *__du; 283 struct flist_head *entry; 284 int l; 285 286 dprint(FD_DISKUTIL, "add maj/min %d/%d: %s\n", majdev, mindev, path); 287 288 du = smalloc(sizeof(*du)); 289 if (!du) { 290 log_err("fio: smalloc() pool exhausted\n"); 291 return NULL; 292 } 293 294 memset(du, 0, sizeof(*du)); 295 INIT_FLIST_HEAD(&du->list); 296 l = snprintf(du->path, sizeof(du->path), "%s/stat", path); 297 if (l < 0 || l >= sizeof(du->path)) { 298 log_err("constructed path \"%.100s[...]/stat\" larger than buffer (%zu bytes)\n", 299 path, sizeof(du->path) - 1); 300 sfree(du); 301 return NULL; 302 } 303 strncpy((char *) du->dus.name, basename(path), FIO_DU_NAME_SZ - 1); 304 du->sysfs_root = path; 305 du->major = majdev; 306 du->minor = mindev; 307 INIT_FLIST_HEAD(&du->slavelist); 308 INIT_FLIST_HEAD(&du->slaves); 309 du->lock = fio_mutex_init(FIO_MUTEX_UNLOCKED); 310 du->users = 0; 311 312 fio_mutex_down(disk_util_mutex); 313 314 flist_for_each(entry, &disk_list) { 315 __du = flist_entry(entry, struct disk_util, list); 316 317 dprint(FD_DISKUTIL, "found %s in list\n", __du->dus.name); 318 319 if (!strcmp((char *) du->dus.name, (char *) __du->dus.name)) { 320 disk_util_free(du); 321 fio_mutex_up(disk_util_mutex); 322 return __du; 323 } 324 } 325 326 dprint(FD_DISKUTIL, "add %s to list\n", du->dus.name); 327 328 fio_gettime(&du->time, NULL); 329 get_io_ticks(du, &du->last_dus); 330 331 flist_add_tail(&du->list, &disk_list); 332 fio_mutex_up(disk_util_mutex); 333 334 find_add_disk_slaves(td, path, du); 335 return du; 336 } 337 338 static int check_dev_match(int majdev, int mindev, char *path) 339 { 340 int major, minor; 341 342 if (read_block_dev_entry(path, &major, &minor)) 343 return 1; 344 345 if (majdev == major && mindev == minor) 346 return 0; 347 348 return 1; 349 } 350 351 static int find_block_dir(int majdev, int mindev, char *path, int link_ok) 352 { 353 struct dirent *dir; 354 struct stat st; 355 int found = 0; 356 DIR *D; 357 358 D = opendir(path); 359 if (!D) 360 return 0; 361 362 while ((dir = readdir(D)) != NULL) { 363 char full_path[256]; 364 365 if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, "..")) 366 continue; 367 368 sprintf(full_path, "%s%s%s", path, FIO_OS_PATH_SEPARATOR, dir->d_name); 369 370 if (!strcmp(dir->d_name, "dev")) { 371 if (!check_dev_match(majdev, mindev, full_path)) { 372 found = 1; 373 break; 374 } 375 } 376 377 if (link_ok) { 378 if (stat(full_path, &st) == -1) { 379 perror("stat"); 380 break; 381 } 382 } else { 383 if (lstat(full_path, &st) == -1) { 384 perror("stat"); 385 break; 386 } 387 } 388 389 if (!S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode)) 390 continue; 391 392 found = find_block_dir(majdev, mindev, full_path, 0); 393 if (found) { 394 strcpy(path, full_path); 395 break; 396 } 397 } 398 399 closedir(D); 400 return found; 401 } 402 403 static struct disk_util *__init_per_file_disk_util(struct thread_data *td, 404 int majdev, int mindev, 405 char *path) 406 { 407 struct stat st; 408 char tmp[PATH_MAX]; 409 char *p; 410 411 /* 412 * If there's a ../queue/ directory there, we are inside a partition. 413 * Check if that is the case and jump back. For loop/md/dm etc we 414 * are already in the right spot. 415 */ 416 sprintf(tmp, "%s/../queue", path); 417 if (!stat(tmp, &st)) { 418 p = dirname(path); 419 sprintf(tmp, "%s/queue", p); 420 if (stat(tmp, &st)) { 421 log_err("unknown sysfs layout\n"); 422 return NULL; 423 } 424 strncpy(tmp, p, PATH_MAX - 1); 425 sprintf(path, "%s", tmp); 426 } 427 428 if (td->o.ioscheduler && !td->sysfs_root) 429 td->sysfs_root = strdup(path); 430 431 return disk_util_add(td, majdev, mindev, path); 432 } 433 434 static struct disk_util *init_per_file_disk_util(struct thread_data *td, 435 char *filename) 436 { 437 438 char foo[PATH_MAX]; 439 struct disk_util *du; 440 int mindev, majdev; 441 442 if (get_device_numbers(filename, &majdev, &mindev)) 443 return NULL; 444 445 dprint(FD_DISKUTIL, "%s belongs to maj/min %d/%d\n", filename, majdev, 446 mindev); 447 448 du = disk_util_exists(majdev, mindev); 449 if (du) { 450 if (td->o.ioscheduler && !td->sysfs_root) 451 td->sysfs_root = strdup(du->sysfs_root); 452 453 return du; 454 } 455 456 /* 457 * for an fs without a device, we will repeatedly stat through 458 * sysfs which can take oodles of time for thousands of files. so 459 * cache the last lookup and compare with that before going through 460 * everything again. 461 */ 462 if (mindev == last_mindev && majdev == last_majdev) 463 return last_du; 464 465 last_mindev = mindev; 466 last_majdev = majdev; 467 468 sprintf(foo, "/sys/block"); 469 if (!find_block_dir(majdev, mindev, foo, 1)) 470 return NULL; 471 472 return __init_per_file_disk_util(td, majdev, mindev, foo); 473 } 474 475 static struct disk_util *__init_disk_util(struct thread_data *td, 476 struct fio_file *f) 477 { 478 return init_per_file_disk_util(td, f->file_name); 479 } 480 481 void init_disk_util(struct thread_data *td) 482 { 483 struct fio_file *f; 484 unsigned int i; 485 486 if (!td->o.do_disk_util || 487 (td->io_ops->flags & (FIO_DISKLESSIO | FIO_NODISKUTIL))) 488 return; 489 490 for_each_file(td, f, i) 491 f->du = __init_disk_util(td, f); 492 } 493 494 static void show_agg_stats(struct disk_util_agg *agg, int terse) 495 { 496 if (!agg->slavecount) 497 return; 498 499 if (!terse) { 500 log_info(", aggrios=%u/%u, aggrmerge=%u/%u, aggrticks=%u/%u," 501 " aggrin_queue=%u, aggrutil=%3.2f%%", 502 agg->ios[0] / agg->slavecount, 503 agg->ios[1] / agg->slavecount, 504 agg->merges[0] / agg->slavecount, 505 agg->merges[1] / agg->slavecount, 506 agg->ticks[0] / agg->slavecount, 507 agg->ticks[1] / agg->slavecount, 508 agg->time_in_queue / agg->slavecount, 509 agg->max_util.u.f); 510 } else { 511 log_info(";slaves;%u;%u;%u;%u;%u;%u;%u;%3.2f%%", 512 agg->ios[0] / agg->slavecount, 513 agg->ios[1] / agg->slavecount, 514 agg->merges[0] / agg->slavecount, 515 agg->merges[1] / agg->slavecount, 516 agg->ticks[0] / agg->slavecount, 517 agg->ticks[1] / agg->slavecount, 518 agg->time_in_queue / agg->slavecount, 519 agg->max_util.u.f); 520 } 521 } 522 523 static void aggregate_slaves_stats(struct disk_util *masterdu) 524 { 525 struct disk_util_agg *agg = &masterdu->agg; 526 struct disk_util_stat *dus; 527 struct flist_head *entry; 528 struct disk_util *slavedu; 529 double util; 530 531 flist_for_each(entry, &masterdu->slaves) { 532 slavedu = flist_entry(entry, struct disk_util, slavelist); 533 dus = &slavedu->dus; 534 agg->ios[0] += dus->s.ios[0]; 535 agg->ios[1] += dus->s.ios[1]; 536 agg->merges[0] += dus->s.merges[0]; 537 agg->merges[1] += dus->s.merges[1]; 538 agg->sectors[0] += dus->s.sectors[0]; 539 agg->sectors[1] += dus->s.sectors[1]; 540 agg->ticks[0] += dus->s.ticks[0]; 541 agg->ticks[1] += dus->s.ticks[1]; 542 agg->time_in_queue += dus->s.time_in_queue; 543 agg->slavecount++; 544 545 util = (double) (100 * dus->s.io_ticks / (double) slavedu->dus.s.msec); 546 /* System utilization is the utilization of the 547 * component with the highest utilization. 548 */ 549 if (util > agg->max_util.u.f) 550 agg->max_util.u.f = util; 551 552 } 553 554 if (agg->max_util.u.f > 100.0) 555 agg->max_util.u.f = 100.0; 556 } 557 558 void disk_util_prune_entries(void) 559 { 560 fio_mutex_down(disk_util_mutex); 561 562 while (!flist_empty(&disk_list)) { 563 struct disk_util *du; 564 565 du = flist_entry(disk_list.next, struct disk_util, list); 566 flist_del(&du->list); 567 disk_util_free(du); 568 } 569 570 last_majdev = last_mindev = -1; 571 fio_mutex_up(disk_util_mutex); 572 fio_mutex_remove(disk_util_mutex); 573 } 574 575 void print_disk_util(struct disk_util_stat *dus, struct disk_util_agg *agg, 576 int terse) 577 { 578 double util = 0; 579 580 if (dus->s.msec) 581 util = (double) 100 * dus->s.io_ticks / (double) dus->s.msec; 582 if (util > 100.0) 583 util = 100.0; 584 585 if (!terse) { 586 if (agg->slavecount) 587 log_info(" "); 588 589 log_info(" %s: ios=%u/%u, merge=%u/%u, ticks=%u/%u, " 590 "in_queue=%u, util=%3.2f%%", dus->name, 591 dus->s.ios[0], dus->s.ios[1], 592 dus->s.merges[0], dus->s.merges[1], 593 dus->s.ticks[0], dus->s.ticks[1], 594 dus->s.time_in_queue, util); 595 } else { 596 log_info(";%s;%u;%u;%u;%u;%u;%u;%u;%3.2f%%", 597 dus->name, dus->s.ios[0], 598 dus->s.ios[1], dus->s.merges[0], 599 dus->s.merges[1], dus->s.ticks[0], 600 dus->s.ticks[1], 601 dus->s.time_in_queue, util); 602 } 603 604 /* 605 * If the device has slaves, aggregate the stats for 606 * those slave devices also. 607 */ 608 show_agg_stats(agg, terse); 609 610 if (!terse) 611 log_info("\n"); 612 } 613 614 void json_array_add_disk_util(struct disk_util_stat *dus, 615 struct disk_util_agg *agg, struct json_array *array) 616 { 617 struct json_object *obj; 618 double util = 0; 619 620 if (dus->s.msec) 621 util = (double) 100 * dus->s.io_ticks / (double) dus->s.msec; 622 if (util > 100.0) 623 util = 100.0; 624 625 obj = json_create_object(); 626 json_array_add_value_object(array, obj); 627 628 json_object_add_value_string(obj, "name", dus->name); 629 json_object_add_value_int(obj, "read_ios", dus->s.ios[0]); 630 json_object_add_value_int(obj, "write_ios", dus->s.ios[1]); 631 json_object_add_value_int(obj, "read_merges", dus->s.merges[0]); 632 json_object_add_value_int(obj, "write_merges", dus->s.merges[1]); 633 json_object_add_value_int(obj, "read_ticks", dus->s.ticks[0]); 634 json_object_add_value_int(obj, "write_ticks", dus->s.ticks[1]); 635 json_object_add_value_int(obj, "in_queue", dus->s.time_in_queue); 636 json_object_add_value_float(obj, "util", util); 637 638 /* 639 * If the device has slaves, aggregate the stats for 640 * those slave devices also. 641 */ 642 if (!agg->slavecount) 643 return; 644 json_object_add_value_int(obj, "aggr_read_ios", 645 agg->ios[0] / agg->slavecount); 646 json_object_add_value_int(obj, "aggr_write_ios", 647 agg->ios[1] / agg->slavecount); 648 json_object_add_value_int(obj, "aggr_read_merges", 649 agg->merges[0] / agg->slavecount); 650 json_object_add_value_int(obj, "aggr_write_merge", 651 agg->merges[1] / agg->slavecount); 652 json_object_add_value_int(obj, "aggr_read_ticks", 653 agg->ticks[0] / agg->slavecount); 654 json_object_add_value_int(obj, "aggr_write_ticks", 655 agg->ticks[1] / agg->slavecount); 656 json_object_add_value_int(obj, "aggr_in_queue", 657 agg->time_in_queue / agg->slavecount); 658 json_object_add_value_float(obj, "aggr_util", agg->max_util.u.f); 659 } 660 661 static void json_object_add_disk_utils(struct json_object *obj, 662 struct flist_head *head) 663 { 664 struct json_array *array = json_create_array(); 665 struct flist_head *entry; 666 struct disk_util *du; 667 668 json_object_add_value_array(obj, "disk_util", array); 669 670 flist_for_each(entry, head) { 671 du = flist_entry(entry, struct disk_util, list); 672 673 aggregate_slaves_stats(du); 674 json_array_add_disk_util(&du->dus, &du->agg, array); 675 } 676 } 677 678 void show_disk_util(int terse, struct json_object *parent) 679 { 680 struct flist_head *entry; 681 struct disk_util *du; 682 683 fio_mutex_down(disk_util_mutex); 684 685 if (flist_empty(&disk_list)) { 686 fio_mutex_up(disk_util_mutex); 687 return; 688 } 689 690 if (output_format == FIO_OUTPUT_JSON) 691 assert(parent); 692 693 if (!terse && output_format != FIO_OUTPUT_JSON) 694 log_info("\nDisk stats (read/write):\n"); 695 696 if (output_format == FIO_OUTPUT_JSON) 697 json_object_add_disk_utils(parent, &disk_list); 698 else 699 flist_for_each(entry, &disk_list) { 700 du = flist_entry(entry, struct disk_util, list); 701 702 aggregate_slaves_stats(du); 703 print_disk_util(&du->dus, &du->agg, terse); 704 } 705 706 fio_mutex_up(disk_util_mutex); 707 } 708 709 void setup_disk_util(void) 710 { 711 disk_util_mutex = fio_mutex_init(FIO_MUTEX_UNLOCKED); 712 } 713