1 /* 2 * Code related to writing an iolog of what a thread is doing, and to 3 * later read that back and replay 4 */ 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <libgen.h> 8 #include <assert.h> 9 #include "flist.h" 10 #include "fio.h" 11 #include "verify.h" 12 #include "trim.h" 13 #include "filelock.h" 14 15 static const char iolog_ver2[] = "fio version 2 iolog"; 16 17 void queue_io_piece(struct thread_data *td, struct io_piece *ipo) 18 { 19 flist_add_tail(&ipo->list, &td->io_log_list); 20 td->total_io_size += ipo->len; 21 } 22 23 void log_io_u(struct thread_data *td, struct io_u *io_u) 24 { 25 const char *act[] = { "read", "write", "sync", "datasync", 26 "sync_file_range", "wait", "trim" }; 27 28 assert(io_u->ddir <= 6); 29 30 if (!td->o.write_iolog_file) 31 return; 32 33 fprintf(td->iolog_f, "%s %s %llu %lu\n", io_u->file->file_name, 34 act[io_u->ddir], io_u->offset, 35 io_u->buflen); 36 } 37 38 void log_file(struct thread_data *td, struct fio_file *f, 39 enum file_log_act what) 40 { 41 const char *act[] = { "add", "open", "close" }; 42 43 assert(what < 3); 44 45 if (!td->o.write_iolog_file) 46 return; 47 48 49 /* 50 * this happens on the pre-open/close done before the job starts 51 */ 52 if (!td->iolog_f) 53 return; 54 55 fprintf(td->iolog_f, "%s %s\n", f->file_name, act[what]); 56 } 57 58 static void iolog_delay(struct thread_data *td, unsigned long delay) 59 { 60 unsigned long usec = utime_since_now(&td->last_issue); 61 unsigned long this_delay; 62 63 if (delay < usec) 64 return; 65 66 delay -= usec; 67 68 /* 69 * less than 100 usec delay, just regard it as noise 70 */ 71 if (delay < 100) 72 return; 73 74 while (delay && !td->terminate) { 75 this_delay = delay; 76 if (this_delay > 500000) 77 this_delay = 500000; 78 79 usec_sleep(td, this_delay); 80 delay -= this_delay; 81 } 82 } 83 84 static int ipo_special(struct thread_data *td, struct io_piece *ipo) 85 { 86 struct fio_file *f; 87 int ret; 88 89 /* 90 * Not a special ipo 91 */ 92 if (ipo->ddir != DDIR_INVAL) 93 return 0; 94 95 f = td->files[ipo->fileno]; 96 97 switch (ipo->file_action) { 98 case FIO_LOG_OPEN_FILE: 99 ret = td_io_open_file(td, f); 100 if (!ret) 101 break; 102 td_verror(td, ret, "iolog open file"); 103 return -1; 104 case FIO_LOG_CLOSE_FILE: 105 td_io_close_file(td, f); 106 break; 107 case FIO_LOG_UNLINK_FILE: 108 unlink(f->file_name); 109 break; 110 default: 111 log_err("fio: bad file action %d\n", ipo->file_action); 112 break; 113 } 114 115 return 1; 116 } 117 118 int read_iolog_get(struct thread_data *td, struct io_u *io_u) 119 { 120 struct io_piece *ipo; 121 unsigned long elapsed; 122 123 while (!flist_empty(&td->io_log_list)) { 124 int ret; 125 126 ipo = flist_entry(td->io_log_list.next, struct io_piece, list); 127 flist_del(&ipo->list); 128 remove_trim_entry(td, ipo); 129 130 ret = ipo_special(td, ipo); 131 if (ret < 0) { 132 free(ipo); 133 break; 134 } else if (ret > 0) { 135 free(ipo); 136 continue; 137 } 138 139 io_u->ddir = ipo->ddir; 140 if (ipo->ddir != DDIR_WAIT) { 141 io_u->offset = ipo->offset; 142 io_u->buflen = ipo->len; 143 io_u->file = td->files[ipo->fileno]; 144 get_file(io_u->file); 145 dprint(FD_IO, "iolog: get %llu/%lu/%s\n", io_u->offset, 146 io_u->buflen, io_u->file->file_name); 147 if (ipo->delay) 148 iolog_delay(td, ipo->delay); 149 } else { 150 elapsed = mtime_since_genesis(); 151 if (ipo->delay > elapsed) 152 usec_sleep(td, (ipo->delay - elapsed) * 1000); 153 } 154 155 free(ipo); 156 157 if (io_u->ddir != DDIR_WAIT) 158 return 0; 159 } 160 161 td->done = 1; 162 return 1; 163 } 164 165 void prune_io_piece_log(struct thread_data *td) 166 { 167 struct io_piece *ipo; 168 struct rb_node *n; 169 170 while ((n = rb_first(&td->io_hist_tree)) != NULL) { 171 ipo = rb_entry(n, struct io_piece, rb_node); 172 rb_erase(n, &td->io_hist_tree); 173 remove_trim_entry(td, ipo); 174 td->io_hist_len--; 175 free(ipo); 176 } 177 178 while (!flist_empty(&td->io_hist_list)) { 179 ipo = flist_entry(td->io_hist_list.next, struct io_piece, list); 180 flist_del(&ipo->list); 181 remove_trim_entry(td, ipo); 182 td->io_hist_len--; 183 free(ipo); 184 } 185 } 186 187 /* 188 * log a successful write, so we can unwind the log for verify 189 */ 190 void log_io_piece(struct thread_data *td, struct io_u *io_u) 191 { 192 struct rb_node **p, *parent; 193 struct io_piece *ipo, *__ipo; 194 195 ipo = malloc(sizeof(struct io_piece)); 196 init_ipo(ipo); 197 ipo->file = io_u->file; 198 ipo->offset = io_u->offset; 199 ipo->len = io_u->buflen; 200 ipo->numberio = io_u->numberio; 201 ipo->flags = IP_F_IN_FLIGHT; 202 203 io_u->ipo = ipo; 204 205 if (io_u_should_trim(td, io_u)) { 206 flist_add_tail(&ipo->trim_list, &td->trim_list); 207 td->trim_entries++; 208 } 209 210 /* 211 * We don't need to sort the entries, if: 212 * 213 * Sequential writes, or 214 * Random writes that lay out the file as it goes along 215 * 216 * For both these cases, just reading back data in the order we 217 * wrote it out is the fastest. 218 * 219 * One exception is if we don't have a random map AND we are doing 220 * verifies, in that case we need to check for duplicate blocks and 221 * drop the old one, which we rely on the rb insert/lookup for 222 * handling. 223 */ 224 if (((!td->o.verifysort) || !td_random(td) || !td->o.overwrite) && 225 (file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) { 226 INIT_FLIST_HEAD(&ipo->list); 227 flist_add_tail(&ipo->list, &td->io_hist_list); 228 ipo->flags |= IP_F_ONLIST; 229 td->io_hist_len++; 230 return; 231 } 232 233 RB_CLEAR_NODE(&ipo->rb_node); 234 235 /* 236 * Sort the entry into the verification list 237 */ 238 restart: 239 p = &td->io_hist_tree.rb_node; 240 parent = NULL; 241 while (*p) { 242 parent = *p; 243 244 __ipo = rb_entry(parent, struct io_piece, rb_node); 245 if (ipo->file < __ipo->file) 246 p = &(*p)->rb_left; 247 else if (ipo->file > __ipo->file) 248 p = &(*p)->rb_right; 249 else if (ipo->offset < __ipo->offset) 250 p = &(*p)->rb_left; 251 else if (ipo->offset > __ipo->offset) 252 p = &(*p)->rb_right; 253 else { 254 dprint(FD_IO, "iolog: overlap %llu/%lu, %llu/%lu", 255 __ipo->offset, __ipo->len, 256 ipo->offset, ipo->len); 257 td->io_hist_len--; 258 rb_erase(parent, &td->io_hist_tree); 259 remove_trim_entry(td, __ipo); 260 free(__ipo); 261 goto restart; 262 } 263 } 264 265 rb_link_node(&ipo->rb_node, parent, p); 266 rb_insert_color(&ipo->rb_node, &td->io_hist_tree); 267 ipo->flags |= IP_F_ONRB; 268 td->io_hist_len++; 269 } 270 271 void unlog_io_piece(struct thread_data *td, struct io_u *io_u) 272 { 273 struct io_piece *ipo = io_u->ipo; 274 275 if (!ipo) 276 return; 277 278 if (ipo->flags & IP_F_ONRB) 279 rb_erase(&ipo->rb_node, &td->io_hist_tree); 280 else if (ipo->flags & IP_F_ONLIST) 281 flist_del(&ipo->list); 282 283 free(ipo); 284 io_u->ipo = NULL; 285 td->io_hist_len--; 286 } 287 288 void trim_io_piece(struct thread_data *td, struct io_u *io_u) 289 { 290 struct io_piece *ipo = io_u->ipo; 291 292 if (!ipo) 293 return; 294 295 ipo->len = io_u->xfer_buflen - io_u->resid; 296 } 297 298 void write_iolog_close(struct thread_data *td) 299 { 300 fflush(td->iolog_f); 301 fclose(td->iolog_f); 302 free(td->iolog_buf); 303 td->iolog_f = NULL; 304 td->iolog_buf = NULL; 305 } 306 307 /* 308 * Read version 2 iolog data. It is enhanced to include per-file logging, 309 * syncs, etc. 310 */ 311 static int read_iolog2(struct thread_data *td, FILE *f) 312 { 313 unsigned long long offset; 314 unsigned int bytes; 315 int reads, writes, waits, fileno = 0, file_action = 0; /* stupid gcc */ 316 char *fname, *act; 317 char *str, *p; 318 enum fio_ddir rw; 319 320 free_release_files(td); 321 322 /* 323 * Read in the read iolog and store it, reuse the infrastructure 324 * for doing verifications. 325 */ 326 str = malloc(4096); 327 fname = malloc(256+16); 328 act = malloc(256+16); 329 330 reads = writes = waits = 0; 331 while ((p = fgets(str, 4096, f)) != NULL) { 332 struct io_piece *ipo; 333 int r; 334 335 r = sscanf(p, "%256s %256s %llu %u", fname, act, &offset, 336 &bytes); 337 if (r == 4) { 338 /* 339 * Check action first 340 */ 341 if (!strcmp(act, "wait")) 342 rw = DDIR_WAIT; 343 else if (!strcmp(act, "read")) 344 rw = DDIR_READ; 345 else if (!strcmp(act, "write")) 346 rw = DDIR_WRITE; 347 else if (!strcmp(act, "sync")) 348 rw = DDIR_SYNC; 349 else if (!strcmp(act, "datasync")) 350 rw = DDIR_DATASYNC; 351 else if (!strcmp(act, "trim")) 352 rw = DDIR_TRIM; 353 else { 354 log_err("fio: bad iolog file action: %s\n", 355 act); 356 continue; 357 } 358 fileno = get_fileno(td, fname); 359 } else if (r == 2) { 360 rw = DDIR_INVAL; 361 if (!strcmp(act, "add")) { 362 fileno = add_file(td, fname, 0, 1); 363 file_action = FIO_LOG_ADD_FILE; 364 continue; 365 } else if (!strcmp(act, "open")) { 366 fileno = get_fileno(td, fname); 367 file_action = FIO_LOG_OPEN_FILE; 368 } else if (!strcmp(act, "close")) { 369 fileno = get_fileno(td, fname); 370 file_action = FIO_LOG_CLOSE_FILE; 371 } else { 372 log_err("fio: bad iolog file action: %s\n", 373 act); 374 continue; 375 } 376 } else { 377 log_err("bad iolog2: %s", p); 378 continue; 379 } 380 381 if (rw == DDIR_READ) 382 reads++; 383 else if (rw == DDIR_WRITE) { 384 /* 385 * Don't add a write for ro mode 386 */ 387 if (read_only) 388 continue; 389 writes++; 390 } else if (rw == DDIR_WAIT) { 391 waits++; 392 } else if (rw == DDIR_INVAL) { 393 } else if (!ddir_sync(rw)) { 394 log_err("bad ddir: %d\n", rw); 395 continue; 396 } 397 398 /* 399 * Make note of file 400 */ 401 ipo = malloc(sizeof(*ipo)); 402 init_ipo(ipo); 403 ipo->ddir = rw; 404 if (rw == DDIR_WAIT) { 405 ipo->delay = offset; 406 } else { 407 ipo->offset = offset; 408 ipo->len = bytes; 409 if (rw != DDIR_INVAL && bytes > td->o.max_bs[rw]) 410 td->o.max_bs[rw] = bytes; 411 ipo->fileno = fileno; 412 ipo->file_action = file_action; 413 } 414 415 queue_io_piece(td, ipo); 416 } 417 418 free(str); 419 free(act); 420 free(fname); 421 422 if (writes && read_only) { 423 log_err("fio: <%s> skips replay of %d writes due to" 424 " read-only\n", td->o.name, writes); 425 writes = 0; 426 } 427 428 if (!reads && !writes && !waits) 429 return 1; 430 else if (reads && !writes) 431 td->o.td_ddir = TD_DDIR_READ; 432 else if (!reads && writes) 433 td->o.td_ddir = TD_DDIR_WRITE; 434 else 435 td->o.td_ddir = TD_DDIR_RW; 436 437 return 0; 438 } 439 440 /* 441 * open iolog, check version, and call appropriate parser 442 */ 443 static int init_iolog_read(struct thread_data *td) 444 { 445 char buffer[256], *p; 446 FILE *f; 447 int ret; 448 449 f = fopen(td->o.read_iolog_file, "r"); 450 if (!f) { 451 perror("fopen read iolog"); 452 return 1; 453 } 454 455 p = fgets(buffer, sizeof(buffer), f); 456 if (!p) { 457 td_verror(td, errno, "iolog read"); 458 log_err("fio: unable to read iolog\n"); 459 fclose(f); 460 return 1; 461 } 462 463 /* 464 * version 2 of the iolog stores a specific string as the 465 * first line, check for that 466 */ 467 if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2))) 468 ret = read_iolog2(td, f); 469 else { 470 log_err("fio: iolog version 1 is no longer supported\n"); 471 ret = 1; 472 } 473 474 fclose(f); 475 return ret; 476 } 477 478 /* 479 * Set up a log for storing io patterns. 480 */ 481 static int init_iolog_write(struct thread_data *td) 482 { 483 struct fio_file *ff; 484 FILE *f; 485 unsigned int i; 486 487 f = fopen(td->o.write_iolog_file, "a"); 488 if (!f) { 489 perror("fopen write iolog"); 490 return 1; 491 } 492 493 /* 494 * That's it for writing, setup a log buffer and we're done. 495 */ 496 td->iolog_f = f; 497 td->iolog_buf = malloc(8192); 498 setvbuf(f, td->iolog_buf, _IOFBF, 8192); 499 500 /* 501 * write our version line 502 */ 503 if (fprintf(f, "%s\n", iolog_ver2) < 0) { 504 perror("iolog init\n"); 505 return 1; 506 } 507 508 /* 509 * add all known files 510 */ 511 for_each_file(td, ff, i) 512 log_file(td, ff, FIO_LOG_ADD_FILE); 513 514 return 0; 515 } 516 517 int init_iolog(struct thread_data *td) 518 { 519 int ret = 0; 520 521 if (td->o.read_iolog_file) { 522 int need_swap; 523 524 /* 525 * Check if it's a blktrace file and load that if possible. 526 * Otherwise assume it's a normal log file and load that. 527 */ 528 if (is_blktrace(td->o.read_iolog_file, &need_swap)) 529 ret = load_blktrace(td, td->o.read_iolog_file, need_swap); 530 else 531 ret = init_iolog_read(td); 532 } else if (td->o.write_iolog_file) 533 ret = init_iolog_write(td); 534 535 if (ret) 536 td_verror(td, EINVAL, "failed initializing iolog"); 537 538 return ret; 539 } 540 541 void setup_log(struct io_log **log, unsigned long avg_msec, int log_type) 542 { 543 struct io_log *l = malloc(sizeof(*l)); 544 545 memset(l, 0, sizeof(*l)); 546 l->nr_samples = 0; 547 l->max_samples = 1024; 548 l->log_type = log_type; 549 l->log = malloc(l->max_samples * sizeof(struct io_sample)); 550 l->avg_msec = avg_msec; 551 *log = l; 552 } 553 554 #ifdef CONFIG_SETVBUF 555 static void *set_file_buffer(FILE *f) 556 { 557 size_t size = 1048576; 558 void *buf; 559 560 buf = malloc(size); 561 setvbuf(f, buf, _IOFBF, size); 562 return buf; 563 } 564 565 static void clear_file_buffer(void *buf) 566 { 567 free(buf); 568 } 569 #else 570 static void *set_file_buffer(FILE *f) 571 { 572 return NULL; 573 } 574 575 static void clear_file_buffer(void *buf) 576 { 577 } 578 #endif 579 580 void __finish_log(struct io_log *log, const char *name) 581 { 582 unsigned int i; 583 void *buf; 584 FILE *f; 585 586 f = fopen(name, "a"); 587 if (!f) { 588 perror("fopen log"); 589 return; 590 } 591 592 buf = set_file_buffer(f); 593 594 for (i = 0; i < log->nr_samples; i++) { 595 fprintf(f, "%lu, %lu, %u, %u\n", 596 (unsigned long) log->log[i].time, 597 (unsigned long) log->log[i].val, 598 log->log[i].ddir, log->log[i].bs); 599 } 600 601 fclose(f); 602 clear_file_buffer(buf); 603 free(log->log); 604 free(log); 605 } 606 607 static int finish_log_named(struct thread_data *td, struct io_log *log, 608 const char *prefix, const char *postfix, 609 int trylock) 610 { 611 char file_name[256]; 612 613 snprintf(file_name, sizeof(file_name), "%s_%s.log", prefix, postfix); 614 615 if (trylock) { 616 if (fio_trylock_file(file_name)) 617 return 1; 618 } else 619 fio_lock_file(file_name); 620 621 if (td->client_type == FIO_CLIENT_TYPE_GUI) { 622 fio_send_iolog(td, log, file_name); 623 free(log->log); 624 free(log); 625 } else 626 __finish_log(log, file_name); 627 628 fio_unlock_file(file_name); 629 return 0; 630 } 631 632 static int finish_log(struct thread_data *td, struct io_log *log, 633 const char *name, int trylock) 634 { 635 return finish_log_named(td, log, td->o.name, name, trylock); 636 } 637 638 static int write_this_log(struct thread_data *td, struct io_log *log, 639 const char *log_file, const char *name, int try) 640 { 641 int ret; 642 643 if (!log) 644 return 0; 645 646 if (log_file) 647 ret = finish_log_named(td, log, log_file, name, try); 648 else 649 ret = finish_log(td, log, name, try); 650 651 return ret; 652 } 653 654 static int write_iops_log(struct thread_data *td, int try) 655 { 656 struct thread_options *o = &td->o; 657 658 return write_this_log(td, td->iops_log, o->iops_log_file, "iops", try); 659 } 660 661 static int write_slat_log(struct thread_data *td, int try) 662 { 663 struct thread_options *o = &td->o; 664 665 return write_this_log(td, td->slat_log, o->lat_log_file, "slat", try); 666 } 667 668 static int write_clat_log(struct thread_data *td, int try) 669 { 670 struct thread_options *o = &td->o; 671 672 return write_this_log(td, td->clat_log, o->lat_log_file, "clat" , try); 673 } 674 675 static int write_lat_log(struct thread_data *td, int try) 676 { 677 struct thread_options *o = &td->o; 678 679 return write_this_log(td, td->lat_log, o->lat_log_file, "lat", try); 680 } 681 682 static int write_bandw_log(struct thread_data *td, int try) 683 { 684 struct thread_options *o = &td->o; 685 686 return write_this_log(td, td->bw_log, o->bw_log_file, "bw", try); 687 } 688 689 enum { 690 BW_LOG_MASK = 1, 691 LAT_LOG_MASK = 2, 692 SLAT_LOG_MASK = 4, 693 CLAT_LOG_MASK = 8, 694 IOPS_LOG_MASK = 16, 695 696 ALL_LOG_NR = 5, 697 }; 698 699 struct log_type { 700 unsigned int mask; 701 int (*fn)(struct thread_data *, int); 702 }; 703 704 static struct log_type log_types[] = { 705 { 706 .mask = BW_LOG_MASK, 707 .fn = write_bandw_log, 708 }, 709 { 710 .mask = LAT_LOG_MASK, 711 .fn = write_lat_log, 712 }, 713 { 714 .mask = SLAT_LOG_MASK, 715 .fn = write_slat_log, 716 }, 717 { 718 .mask = CLAT_LOG_MASK, 719 .fn = write_clat_log, 720 }, 721 { 722 .mask = IOPS_LOG_MASK, 723 .fn = write_iops_log, 724 }, 725 }; 726 727 void fio_writeout_logs(struct thread_data *td) 728 { 729 unsigned int log_mask = 0; 730 unsigned int log_left = ALL_LOG_NR; 731 int old_state, i; 732 733 old_state = td_bump_runstate(td, TD_FINISHING); 734 735 finalize_logs(td); 736 737 while (log_left) { 738 int prev_log_left = log_left; 739 740 for (i = 0; i < ALL_LOG_NR && log_left; i++) { 741 struct log_type *lt = &log_types[i]; 742 int ret; 743 744 if (!(log_mask & lt->mask)) { 745 ret = lt->fn(td, log_left != 1); 746 if (!ret) { 747 log_left--; 748 log_mask |= lt->mask; 749 } 750 } 751 } 752 753 if (prev_log_left == log_left) 754 usleep(5000); 755 } 756 757 td_restore_runstate(td, old_state); 758 } 759