1 #include <stdio.h> 2 #include <string.h> 3 #include <sys/time.h> 4 #include <sys/types.h> 5 #include <sys/stat.h> 6 #include <dirent.h> 7 #include <libgen.h> 8 #include <math.h> 9 #include <assert.h> 10 11 #include "fio.h" 12 #include "smalloc.h" 13 #include "diskutil.h" 14 #include "helper_thread.h" 15 16 static int last_majdev, last_mindev; 17 static struct disk_util *last_du; 18 19 static struct fio_mutex *disk_util_mutex; 20 21 static struct disk_util *__init_per_file_disk_util(struct thread_data *td, 22 int majdev, int mindev, char *path); 23 24 static void disk_util_free(struct disk_util *du) 25 { 26 if (du == last_du) 27 last_du = NULL; 28 29 while (!flist_empty(&du->slaves)) { 30 struct disk_util *slave; 31 32 slave = flist_first_entry(&du->slaves, struct disk_util, slavelist); 33 flist_del(&slave->slavelist); 34 slave->users--; 35 } 36 37 fio_mutex_remove(du->lock); 38 free(du->sysfs_root); 39 sfree(du); 40 } 41 42 static int get_io_ticks(struct disk_util *du, struct disk_util_stat *dus) 43 { 44 unsigned in_flight; 45 unsigned long long sectors[2]; 46 char line[256]; 47 FILE *f; 48 char *p; 49 int ret; 50 51 dprint(FD_DISKUTIL, "open stat file: %s\n", du->path); 52 53 f = fopen(du->path, "r"); 54 if (!f) 55 return 1; 56 57 p = fgets(line, sizeof(line), f); 58 if (!p) { 59 fclose(f); 60 return 1; 61 } 62 63 dprint(FD_DISKUTIL, "%s: %s", du->path, p); 64 65 ret = sscanf(p, "%llu %llu %llu %llu %llu %llu %llu %llu %u %llu %llu\n", 66 (unsigned long long *) &dus->s.ios[0], 67 (unsigned long long *) &dus->s.merges[0], 68 §ors[0], 69 (unsigned long long *) &dus->s.ticks[0], 70 (unsigned long long *) &dus->s.ios[1], 71 (unsigned long long *) &dus->s.merges[1], 72 §ors[1], 73 (unsigned long long *) &dus->s.ticks[1], 74 &in_flight, 75 (unsigned long long *) &dus->s.io_ticks, 76 (unsigned long long *) &dus->s.time_in_queue); 77 fclose(f); 78 dprint(FD_DISKUTIL, "%s: stat read ok? %d\n", du->path, ret == 1); 79 dus->s.sectors[0] = sectors[0]; 80 dus->s.sectors[1] = sectors[1]; 81 return ret != 11; 82 } 83 84 static void update_io_tick_disk(struct disk_util *du) 85 { 86 struct disk_util_stat __dus, *dus, *ldus; 87 struct timeval t; 88 89 if (!du->users) 90 return; 91 if (get_io_ticks(du, &__dus)) 92 return; 93 94 dus = &du->dus; 95 ldus = &du->last_dus; 96 97 dus->s.sectors[0] += (__dus.s.sectors[0] - ldus->s.sectors[0]); 98 dus->s.sectors[1] += (__dus.s.sectors[1] - ldus->s.sectors[1]); 99 dus->s.ios[0] += (__dus.s.ios[0] - ldus->s.ios[0]); 100 dus->s.ios[1] += (__dus.s.ios[1] - ldus->s.ios[1]); 101 dus->s.merges[0] += (__dus.s.merges[0] - ldus->s.merges[0]); 102 dus->s.merges[1] += (__dus.s.merges[1] - ldus->s.merges[1]); 103 dus->s.ticks[0] += (__dus.s.ticks[0] - ldus->s.ticks[0]); 104 dus->s.ticks[1] += (__dus.s.ticks[1] - ldus->s.ticks[1]); 105 dus->s.io_ticks += (__dus.s.io_ticks - ldus->s.io_ticks); 106 dus->s.time_in_queue += (__dus.s.time_in_queue - ldus->s.time_in_queue); 107 108 fio_gettime(&t, NULL); 109 dus->s.msec += mtime_since(&du->time, &t); 110 memcpy(&du->time, &t, sizeof(t)); 111 memcpy(&ldus->s, &__dus.s, sizeof(__dus.s)); 112 } 113 114 int update_io_ticks(void) 115 { 116 struct flist_head *entry; 117 struct disk_util *du; 118 int ret = 0; 119 120 dprint(FD_DISKUTIL, "update io ticks\n"); 121 122 fio_mutex_down(disk_util_mutex); 123 124 if (!helper_should_exit()) { 125 flist_for_each(entry, &disk_list) { 126 du = flist_entry(entry, struct disk_util, list); 127 update_io_tick_disk(du); 128 } 129 } else 130 ret = 1; 131 132 fio_mutex_up(disk_util_mutex); 133 return ret; 134 } 135 136 static struct disk_util *disk_util_exists(int major, int minor) 137 { 138 struct flist_head *entry; 139 struct disk_util *du; 140 141 fio_mutex_down(disk_util_mutex); 142 143 flist_for_each(entry, &disk_list) { 144 du = flist_entry(entry, struct disk_util, list); 145 146 if (major == du->major && minor == du->minor) { 147 fio_mutex_up(disk_util_mutex); 148 return du; 149 } 150 } 151 152 fio_mutex_up(disk_util_mutex); 153 return NULL; 154 } 155 156 static int get_device_numbers(char *file_name, int *maj, int *min) 157 { 158 struct stat st; 159 int majdev, mindev; 160 char tempname[PATH_MAX], *p; 161 162 if (!lstat(file_name, &st)) { 163 if (S_ISBLK(st.st_mode)) { 164 majdev = major(st.st_rdev); 165 mindev = minor(st.st_rdev); 166 } else if (S_ISCHR(st.st_mode)) { 167 majdev = major(st.st_rdev); 168 mindev = minor(st.st_rdev); 169 if (fio_lookup_raw(st.st_rdev, &majdev, &mindev)) 170 return -1; 171 } else if (S_ISFIFO(st.st_mode)) 172 return -1; 173 else { 174 majdev = major(st.st_dev); 175 mindev = minor(st.st_dev); 176 } 177 } else { 178 /* 179 * must be a file, open "." in that path 180 */ 181 tempname[PATH_MAX - 1] = '\0'; 182 strncpy(tempname, file_name, PATH_MAX - 1); 183 p = dirname(tempname); 184 if (stat(p, &st)) { 185 perror("disk util stat"); 186 return -1; 187 } 188 189 majdev = major(st.st_dev); 190 mindev = minor(st.st_dev); 191 } 192 193 *min = mindev; 194 *maj = majdev; 195 196 return 0; 197 } 198 199 static int read_block_dev_entry(char *path, int *maj, int *min) 200 { 201 char line[256], *p; 202 FILE *f; 203 204 f = fopen(path, "r"); 205 if (!f) { 206 perror("open path"); 207 return 1; 208 } 209 210 p = fgets(line, sizeof(line), f); 211 fclose(f); 212 213 if (!p) 214 return 1; 215 216 if (sscanf(p, "%u:%u", maj, min) != 2) 217 return 1; 218 219 return 0; 220 } 221 222 static void find_add_disk_slaves(struct thread_data *td, char *path, 223 struct disk_util *masterdu) 224 { 225 DIR *dirhandle = NULL; 226 struct dirent *dirent = NULL; 227 char slavesdir[PATH_MAX], temppath[PATH_MAX], slavepath[PATH_MAX]; 228 struct disk_util *slavedu = NULL; 229 int majdev, mindev; 230 ssize_t linklen; 231 232 sprintf(slavesdir, "%s/%s", path, "slaves"); 233 dirhandle = opendir(slavesdir); 234 if (!dirhandle) 235 return; 236 237 while ((dirent = readdir(dirhandle)) != NULL) { 238 if (!strcmp(dirent->d_name, ".") || 239 !strcmp(dirent->d_name, "..")) 240 continue; 241 242 sprintf(temppath, "%s/%s", slavesdir, dirent->d_name); 243 /* Can we always assume that the slaves device entries 244 * are links to the real directories for the slave 245 * devices? 246 */ 247 linklen = readlink(temppath, slavepath, PATH_MAX - 1); 248 if (linklen < 0) { 249 perror("readlink() for slave device."); 250 closedir(dirhandle); 251 return; 252 } 253 slavepath[linklen] = '\0'; 254 255 sprintf(temppath, "%s/%s/dev", slavesdir, slavepath); 256 if (read_block_dev_entry(temppath, &majdev, &mindev)) { 257 perror("Error getting slave device numbers."); 258 closedir(dirhandle); 259 return; 260 } 261 262 /* 263 * See if this maj,min already exists 264 */ 265 slavedu = disk_util_exists(majdev, mindev); 266 if (slavedu) 267 continue; 268 269 sprintf(temppath, "%s/%s", slavesdir, slavepath); 270 __init_per_file_disk_util(td, majdev, mindev, temppath); 271 slavedu = disk_util_exists(majdev, mindev); 272 273 /* Should probably use an assert here. slavedu should 274 * always be present at this point. */ 275 if (slavedu) { 276 slavedu->users++; 277 flist_add_tail(&slavedu->slavelist, &masterdu->slaves); 278 } 279 } 280 281 closedir(dirhandle); 282 } 283 284 static struct disk_util *disk_util_add(struct thread_data *td, int majdev, 285 int mindev, char *path) 286 { 287 struct disk_util *du, *__du; 288 struct flist_head *entry; 289 int l; 290 291 dprint(FD_DISKUTIL, "add maj/min %d/%d: %s\n", majdev, mindev, path); 292 293 du = smalloc(sizeof(*du)); 294 if (!du) 295 return NULL; 296 297 memset(du, 0, sizeof(*du)); 298 INIT_FLIST_HEAD(&du->list); 299 l = snprintf(du->path, sizeof(du->path), "%s/stat", path); 300 if (l < 0 || l >= sizeof(du->path)) { 301 log_err("constructed path \"%.100s[...]/stat\" larger than buffer (%zu bytes)\n", 302 path, sizeof(du->path) - 1); 303 sfree(du); 304 return NULL; 305 } 306 strncpy((char *) du->dus.name, basename(path), FIO_DU_NAME_SZ - 1); 307 du->sysfs_root = strdup(path); 308 du->major = majdev; 309 du->minor = mindev; 310 INIT_FLIST_HEAD(&du->slavelist); 311 INIT_FLIST_HEAD(&du->slaves); 312 du->lock = fio_mutex_init(FIO_MUTEX_UNLOCKED); 313 du->users = 0; 314 315 fio_mutex_down(disk_util_mutex); 316 317 flist_for_each(entry, &disk_list) { 318 __du = flist_entry(entry, struct disk_util, list); 319 320 dprint(FD_DISKUTIL, "found %s in list\n", __du->dus.name); 321 322 if (!strcmp((char *) du->dus.name, (char *) __du->dus.name)) { 323 disk_util_free(du); 324 fio_mutex_up(disk_util_mutex); 325 return __du; 326 } 327 } 328 329 dprint(FD_DISKUTIL, "add %s to list\n", du->dus.name); 330 331 fio_gettime(&du->time, NULL); 332 get_io_ticks(du, &du->last_dus); 333 334 flist_add_tail(&du->list, &disk_list); 335 fio_mutex_up(disk_util_mutex); 336 337 find_add_disk_slaves(td, path, du); 338 return du; 339 } 340 341 static int check_dev_match(int majdev, int mindev, char *path) 342 { 343 int major, minor; 344 345 if (read_block_dev_entry(path, &major, &minor)) 346 return 1; 347 348 if (majdev == major && mindev == minor) 349 return 0; 350 351 return 1; 352 } 353 354 static int find_block_dir(int majdev, int mindev, char *path, int link_ok) 355 { 356 struct dirent *dir; 357 struct stat st; 358 int found = 0; 359 DIR *D; 360 361 D = opendir(path); 362 if (!D) 363 return 0; 364 365 while ((dir = readdir(D)) != NULL) { 366 char full_path[256]; 367 368 if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, "..")) 369 continue; 370 371 sprintf(full_path, "%s/%s", path, dir->d_name); 372 373 if (!strcmp(dir->d_name, "dev")) { 374 if (!check_dev_match(majdev, mindev, full_path)) { 375 found = 1; 376 break; 377 } 378 } 379 380 if (link_ok) { 381 if (stat(full_path, &st) == -1) { 382 perror("stat"); 383 break; 384 } 385 } else { 386 if (lstat(full_path, &st) == -1) { 387 perror("stat"); 388 break; 389 } 390 } 391 392 if (!S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode)) 393 continue; 394 395 found = find_block_dir(majdev, mindev, full_path, 0); 396 if (found) { 397 strcpy(path, full_path); 398 break; 399 } 400 } 401 402 closedir(D); 403 return found; 404 } 405 406 static struct disk_util *__init_per_file_disk_util(struct thread_data *td, 407 int majdev, int mindev, 408 char *path) 409 { 410 struct stat st; 411 char tmp[PATH_MAX]; 412 char *p; 413 414 /* 415 * If there's a ../queue/ directory there, we are inside a partition. 416 * Check if that is the case and jump back. For loop/md/dm etc we 417 * are already in the right spot. 418 */ 419 sprintf(tmp, "%s/../queue", path); 420 if (!stat(tmp, &st)) { 421 p = dirname(path); 422 sprintf(tmp, "%s/queue", p); 423 if (stat(tmp, &st)) { 424 log_err("unknown sysfs layout\n"); 425 return NULL; 426 } 427 tmp[PATH_MAX - 1] = '\0'; 428 strncpy(tmp, p, PATH_MAX - 1); 429 sprintf(path, "%s", tmp); 430 } 431 432 return disk_util_add(td, majdev, mindev, path); 433 } 434 435 static struct disk_util *init_per_file_disk_util(struct thread_data *td, 436 char *filename) 437 { 438 439 char foo[PATH_MAX]; 440 struct disk_util *du; 441 int mindev, majdev; 442 443 if (get_device_numbers(filename, &majdev, &mindev)) 444 return NULL; 445 446 dprint(FD_DISKUTIL, "%s belongs to maj/min %d/%d\n", filename, majdev, 447 mindev); 448 449 du = disk_util_exists(majdev, mindev); 450 if (du) 451 return du; 452 453 /* 454 * for an fs without a device, we will repeatedly stat through 455 * sysfs which can take oodles of time for thousands of files. so 456 * cache the last lookup and compare with that before going through 457 * everything again. 458 */ 459 if (mindev == last_mindev && majdev == last_majdev) 460 return last_du; 461 462 last_mindev = mindev; 463 last_majdev = majdev; 464 465 sprintf(foo, "/sys/block"); 466 if (!find_block_dir(majdev, mindev, foo, 1)) 467 return NULL; 468 469 return __init_per_file_disk_util(td, majdev, mindev, foo); 470 } 471 472 static struct disk_util *__init_disk_util(struct thread_data *td, 473 struct fio_file *f) 474 { 475 return init_per_file_disk_util(td, f->file_name); 476 } 477 478 void init_disk_util(struct thread_data *td) 479 { 480 struct fio_file *f; 481 unsigned int i; 482 483 if (!td->o.do_disk_util || 484 td_ioengine_flagged(td, FIO_DISKLESSIO | FIO_NODISKUTIL)) 485 return; 486 487 for_each_file(td, f, i) 488 f->du = __init_disk_util(td, f); 489 } 490 491 static void show_agg_stats(struct disk_util_agg *agg, int terse, 492 struct buf_output *out) 493 { 494 if (!agg->slavecount) 495 return; 496 497 if (!terse) { 498 log_buf(out, ", aggrios=%llu/%llu, aggrmerge=%llu/%llu, " 499 "aggrticks=%llu/%llu, aggrin_queue=%llu, " 500 "aggrutil=%3.2f%%", 501 (unsigned long long) agg->ios[0] / agg->slavecount, 502 (unsigned long long) agg->ios[1] / agg->slavecount, 503 (unsigned long long) agg->merges[0] / agg->slavecount, 504 (unsigned long long) agg->merges[1] / agg->slavecount, 505 (unsigned long long) agg->ticks[0] / agg->slavecount, 506 (unsigned long long) agg->ticks[1] / agg->slavecount, 507 (unsigned long long) agg->time_in_queue / agg->slavecount, 508 agg->max_util.u.f); 509 } else { 510 log_buf(out, ";slaves;%llu;%llu;%llu;%llu;%llu;%llu;%llu;%3.2f%%", 511 (unsigned long long) agg->ios[0] / agg->slavecount, 512 (unsigned long long) agg->ios[1] / agg->slavecount, 513 (unsigned long long) agg->merges[0] / agg->slavecount, 514 (unsigned long long) agg->merges[1] / agg->slavecount, 515 (unsigned long long) agg->ticks[0] / agg->slavecount, 516 (unsigned long long) agg->ticks[1] / agg->slavecount, 517 (unsigned long long) agg->time_in_queue / agg->slavecount, 518 agg->max_util.u.f); 519 } 520 } 521 522 static void aggregate_slaves_stats(struct disk_util *masterdu) 523 { 524 struct disk_util_agg *agg = &masterdu->agg; 525 struct disk_util_stat *dus; 526 struct flist_head *entry; 527 struct disk_util *slavedu; 528 double util; 529 530 flist_for_each(entry, &masterdu->slaves) { 531 slavedu = flist_entry(entry, struct disk_util, slavelist); 532 dus = &slavedu->dus; 533 agg->ios[0] += dus->s.ios[0]; 534 agg->ios[1] += dus->s.ios[1]; 535 agg->merges[0] += dus->s.merges[0]; 536 agg->merges[1] += dus->s.merges[1]; 537 agg->sectors[0] += dus->s.sectors[0]; 538 agg->sectors[1] += dus->s.sectors[1]; 539 agg->ticks[0] += dus->s.ticks[0]; 540 agg->ticks[1] += dus->s.ticks[1]; 541 agg->time_in_queue += dus->s.time_in_queue; 542 agg->slavecount++; 543 544 util = (double) (100 * dus->s.io_ticks / (double) slavedu->dus.s.msec); 545 /* System utilization is the utilization of the 546 * component with the highest utilization. 547 */ 548 if (util > agg->max_util.u.f) 549 agg->max_util.u.f = util; 550 551 } 552 553 if (agg->max_util.u.f > 100.0) 554 agg->max_util.u.f = 100.0; 555 } 556 557 void disk_util_prune_entries(void) 558 { 559 fio_mutex_down(disk_util_mutex); 560 561 while (!flist_empty(&disk_list)) { 562 struct disk_util *du; 563 564 du = flist_first_entry(&disk_list, struct disk_util, list); 565 flist_del(&du->list); 566 disk_util_free(du); 567 } 568 569 last_majdev = last_mindev = -1; 570 fio_mutex_up(disk_util_mutex); 571 fio_mutex_remove(disk_util_mutex); 572 } 573 574 void print_disk_util(struct disk_util_stat *dus, struct disk_util_agg *agg, 575 int terse, struct buf_output *out) 576 { 577 double util = 0; 578 579 if (dus->s.msec) 580 util = (double) 100 * dus->s.io_ticks / (double) dus->s.msec; 581 if (util > 100.0) 582 util = 100.0; 583 584 if (!terse) { 585 if (agg->slavecount) 586 log_buf(out, " "); 587 588 log_buf(out, " %s: ios=%llu/%llu, merge=%llu/%llu, " 589 "ticks=%llu/%llu, in_queue=%llu, util=%3.2f%%", 590 dus->name, 591 (unsigned long long) dus->s.ios[0], 592 (unsigned long long) dus->s.ios[1], 593 (unsigned long long) dus->s.merges[0], 594 (unsigned long long) dus->s.merges[1], 595 (unsigned long long) dus->s.ticks[0], 596 (unsigned long long) dus->s.ticks[1], 597 (unsigned long long) dus->s.time_in_queue, 598 util); 599 } else { 600 log_buf(out, ";%s;%llu;%llu;%llu;%llu;%llu;%llu;%llu;%3.2f%%", 601 dus->name, 602 (unsigned long long) dus->s.ios[0], 603 (unsigned long long) dus->s.ios[1], 604 (unsigned long long) dus->s.merges[0], 605 (unsigned long long) dus->s.merges[1], 606 (unsigned long long) dus->s.ticks[0], 607 (unsigned long long) dus->s.ticks[1], 608 (unsigned long long) dus->s.time_in_queue, 609 util); 610 } 611 612 /* 613 * If the device has slaves, aggregate the stats for 614 * those slave devices also. 615 */ 616 show_agg_stats(agg, terse, out); 617 618 if (!terse) 619 log_buf(out, "\n"); 620 } 621 622 void json_array_add_disk_util(struct disk_util_stat *dus, 623 struct disk_util_agg *agg, struct json_array *array) 624 { 625 struct json_object *obj; 626 double util = 0; 627 628 if (dus->s.msec) 629 util = (double) 100 * dus->s.io_ticks / (double) dus->s.msec; 630 if (util > 100.0) 631 util = 100.0; 632 633 obj = json_create_object(); 634 json_array_add_value_object(array, obj); 635 636 json_object_add_value_string(obj, "name", dus->name); 637 json_object_add_value_int(obj, "read_ios", dus->s.ios[0]); 638 json_object_add_value_int(obj, "write_ios", dus->s.ios[1]); 639 json_object_add_value_int(obj, "read_merges", dus->s.merges[0]); 640 json_object_add_value_int(obj, "write_merges", dus->s.merges[1]); 641 json_object_add_value_int(obj, "read_ticks", dus->s.ticks[0]); 642 json_object_add_value_int(obj, "write_ticks", dus->s.ticks[1]); 643 json_object_add_value_int(obj, "in_queue", dus->s.time_in_queue); 644 json_object_add_value_float(obj, "util", util); 645 646 /* 647 * If the device has slaves, aggregate the stats for 648 * those slave devices also. 649 */ 650 if (!agg->slavecount) 651 return; 652 json_object_add_value_int(obj, "aggr_read_ios", 653 agg->ios[0] / agg->slavecount); 654 json_object_add_value_int(obj, "aggr_write_ios", 655 agg->ios[1] / agg->slavecount); 656 json_object_add_value_int(obj, "aggr_read_merges", 657 agg->merges[0] / agg->slavecount); 658 json_object_add_value_int(obj, "aggr_write_merge", 659 agg->merges[1] / agg->slavecount); 660 json_object_add_value_int(obj, "aggr_read_ticks", 661 agg->ticks[0] / agg->slavecount); 662 json_object_add_value_int(obj, "aggr_write_ticks", 663 agg->ticks[1] / agg->slavecount); 664 json_object_add_value_int(obj, "aggr_in_queue", 665 agg->time_in_queue / agg->slavecount); 666 json_object_add_value_float(obj, "aggr_util", agg->max_util.u.f); 667 } 668 669 static void json_object_add_disk_utils(struct json_object *obj, 670 struct flist_head *head) 671 { 672 struct json_array *array = json_create_array(); 673 struct flist_head *entry; 674 struct disk_util *du; 675 676 json_object_add_value_array(obj, "disk_util", array); 677 678 flist_for_each(entry, head) { 679 du = flist_entry(entry, struct disk_util, list); 680 681 aggregate_slaves_stats(du); 682 json_array_add_disk_util(&du->dus, &du->agg, array); 683 } 684 } 685 686 void show_disk_util(int terse, struct json_object *parent, 687 struct buf_output *out) 688 { 689 struct flist_head *entry; 690 struct disk_util *du; 691 bool do_json; 692 693 if (!disk_util_mutex) 694 return; 695 696 fio_mutex_down(disk_util_mutex); 697 698 if (flist_empty(&disk_list)) { 699 fio_mutex_up(disk_util_mutex); 700 return; 701 } 702 703 if ((output_format & FIO_OUTPUT_JSON) && parent) 704 do_json = true; 705 else 706 do_json = false; 707 708 if (!terse && !do_json) 709 log_buf(out, "\nDisk stats (read/write):\n"); 710 711 if (do_json) 712 json_object_add_disk_utils(parent, &disk_list); 713 else if (output_format & ~(FIO_OUTPUT_JSON | FIO_OUTPUT_JSON_PLUS)) { 714 flist_for_each(entry, &disk_list) { 715 du = flist_entry(entry, struct disk_util, list); 716 717 aggregate_slaves_stats(du); 718 print_disk_util(&du->dus, &du->agg, terse, out); 719 } 720 } 721 722 fio_mutex_up(disk_util_mutex); 723 } 724 725 void setup_disk_util(void) 726 { 727 disk_util_mutex = fio_mutex_init(FIO_MUTEX_UNLOCKED); 728 } 729