1 /* 2 * The io parts of the fio tool, includes workers for sync and mmap'ed 3 * io, as well as both posix and linux libaio support. 4 * 5 * sync io is implemented on top of aio. 6 * 7 * This is not really specific to fio, if the get_io_u/put_io_u and 8 * structures was pulled into this as well it would be a perfectly 9 * generic io engine that could be used for other projects. 10 * 11 */ 12 #include <stdio.h> 13 #include <stdlib.h> 14 #include <unistd.h> 15 #include <string.h> 16 #include <dlfcn.h> 17 #include <fcntl.h> 18 #include <assert.h> 19 20 #include "fio.h" 21 #include "diskutil.h" 22 23 static FLIST_HEAD(engine_list); 24 25 static bool check_engine_ops(struct ioengine_ops *ops) 26 { 27 if (ops->version != FIO_IOOPS_VERSION) { 28 log_err("bad ioops version %d (want %d)\n", ops->version, 29 FIO_IOOPS_VERSION); 30 return true; 31 } 32 33 if (!ops->queue) { 34 log_err("%s: no queue handler\n", ops->name); 35 return true; 36 } 37 38 /* 39 * sync engines only need a ->queue() 40 */ 41 if (ops->flags & FIO_SYNCIO) 42 return false; 43 44 if (!ops->event || !ops->getevents) { 45 log_err("%s: no event/getevents handler\n", ops->name); 46 return true; 47 } 48 49 return false; 50 } 51 52 void unregister_ioengine(struct ioengine_ops *ops) 53 { 54 dprint(FD_IO, "ioengine %s unregistered\n", ops->name); 55 flist_del(&ops->list); 56 INIT_FLIST_HEAD(&ops->list); 57 } 58 59 void register_ioengine(struct ioengine_ops *ops) 60 { 61 dprint(FD_IO, "ioengine %s registered\n", ops->name); 62 INIT_FLIST_HEAD(&ops->list); 63 flist_add_tail(&ops->list, &engine_list); 64 } 65 66 static struct ioengine_ops *find_ioengine(const char *name) 67 { 68 struct ioengine_ops *ops; 69 struct flist_head *entry; 70 71 flist_for_each(entry, &engine_list) { 72 ops = flist_entry(entry, struct ioengine_ops, list); 73 if (!strcmp(name, ops->name)) 74 return ops; 75 } 76 77 return NULL; 78 } 79 80 static struct ioengine_ops *dlopen_ioengine(struct thread_data *td, 81 const char *engine_lib) 82 { 83 struct ioengine_ops *ops; 84 void *dlhandle; 85 86 dprint(FD_IO, "dload engine %s\n", engine_lib); 87 88 dlerror(); 89 dlhandle = dlopen(engine_lib, RTLD_LAZY); 90 if (!dlhandle) { 91 td_vmsg(td, -1, dlerror(), "dlopen"); 92 return NULL; 93 } 94 95 /* 96 * Unlike the included modules, external engines should have a 97 * non-static ioengine structure that we can reference. 98 */ 99 ops = dlsym(dlhandle, engine_lib); 100 if (!ops) 101 ops = dlsym(dlhandle, "ioengine"); 102 103 /* 104 * For some external engines (like C++ ones) it is not that trivial 105 * to provide a non-static ionengine structure that we can reference. 106 * Instead we call a method which allocates the required ioengine 107 * structure. 108 */ 109 if (!ops) { 110 get_ioengine_t get_ioengine = dlsym(dlhandle, "get_ioengine"); 111 112 if (get_ioengine) 113 get_ioengine(&ops); 114 } 115 116 if (!ops) { 117 td_vmsg(td, -1, dlerror(), "dlsym"); 118 dlclose(dlhandle); 119 return NULL; 120 } 121 122 td->io_ops_dlhandle = dlhandle; 123 return ops; 124 } 125 126 struct ioengine_ops *load_ioengine(struct thread_data *td, const char *name) 127 { 128 struct ioengine_ops *ops; 129 char engine[64]; 130 131 dprint(FD_IO, "load ioengine %s\n", name); 132 133 engine[sizeof(engine) - 1] = '\0'; 134 strncpy(engine, name, sizeof(engine) - 1); 135 136 /* 137 * linux libaio has alias names, so convert to what we want 138 */ 139 if (!strncmp(engine, "linuxaio", 8) || !strncmp(engine, "aio", 3)) 140 strcpy(engine, "libaio"); 141 142 ops = find_ioengine(engine); 143 if (!ops) 144 ops = dlopen_ioengine(td, name); 145 146 if (!ops) { 147 log_err("fio: engine %s not loadable\n", name); 148 return NULL; 149 } 150 151 /* 152 * Check that the required methods are there. 153 */ 154 if (check_engine_ops(ops)) 155 return NULL; 156 157 return ops; 158 } 159 160 /* 161 * For cleaning up an ioengine which never made it to init(). 162 */ 163 void free_ioengine(struct thread_data *td) 164 { 165 dprint(FD_IO, "free ioengine %s\n", td->io_ops->name); 166 167 if (td->eo && td->io_ops->options) { 168 options_free(td->io_ops->options, td->eo); 169 free(td->eo); 170 td->eo = NULL; 171 } 172 173 if (td->io_ops_dlhandle) 174 dlclose(td->io_ops_dlhandle); 175 176 td->io_ops = NULL; 177 } 178 179 void close_ioengine(struct thread_data *td) 180 { 181 dprint(FD_IO, "close ioengine %s\n", td->io_ops->name); 182 183 if (td->io_ops->cleanup) { 184 td->io_ops->cleanup(td); 185 td->io_ops_data = NULL; 186 } 187 188 free_ioengine(td); 189 } 190 191 int td_io_prep(struct thread_data *td, struct io_u *io_u) 192 { 193 dprint_io_u(io_u, "prep"); 194 fio_ro_check(td, io_u); 195 196 lock_file(td, io_u->file, io_u->ddir); 197 198 if (td->io_ops->prep) { 199 int ret = td->io_ops->prep(td, io_u); 200 201 dprint(FD_IO, "->prep(%p)=%d\n", io_u, ret); 202 if (ret) 203 unlock_file(td, io_u->file); 204 return ret; 205 } 206 207 return 0; 208 } 209 210 int td_io_getevents(struct thread_data *td, unsigned int min, unsigned int max, 211 const struct timespec *t) 212 { 213 int r = 0; 214 215 /* 216 * For ioengine=rdma one side operation RDMA_WRITE or RDMA_READ, 217 * server side gets a message from the client 218 * side that the task is finished, and 219 * td->done is set to 1 after td_io_commit(). In this case, 220 * there is no need to reap complete event in server side. 221 */ 222 if (td->done) 223 return 0; 224 225 if (min > 0 && td->io_ops->commit) { 226 r = td->io_ops->commit(td); 227 if (r < 0) 228 goto out; 229 } 230 if (max > td->cur_depth) 231 max = td->cur_depth; 232 if (min > max) 233 max = min; 234 235 r = 0; 236 if (max && td->io_ops->getevents) 237 r = td->io_ops->getevents(td, min, max, t); 238 out: 239 if (r >= 0) { 240 /* 241 * Reflect that our submitted requests were retrieved with 242 * whatever OS async calls are in the underlying engine. 243 */ 244 td->io_u_in_flight -= r; 245 io_u_mark_complete(td, r); 246 } else 247 td_verror(td, r, "get_events"); 248 249 dprint(FD_IO, "getevents: %d\n", r); 250 return r; 251 } 252 253 int td_io_queue(struct thread_data *td, struct io_u *io_u) 254 { 255 const enum fio_ddir ddir = acct_ddir(io_u); 256 unsigned long buflen = io_u->xfer_buflen; 257 int ret; 258 259 dprint_io_u(io_u, "queue"); 260 fio_ro_check(td, io_u); 261 262 assert((io_u->flags & IO_U_F_FLIGHT) == 0); 263 io_u_set(td, io_u, IO_U_F_FLIGHT); 264 265 assert(fio_file_open(io_u->file)); 266 267 /* 268 * If using a write iolog, store this entry. 269 */ 270 log_io_u(td, io_u); 271 272 io_u->error = 0; 273 io_u->resid = 0; 274 275 if (td_ioengine_flagged(td, FIO_SYNCIO)) { 276 if (fio_fill_issue_time(td)) 277 fio_gettime(&io_u->issue_time, NULL); 278 279 /* 280 * only used for iolog 281 */ 282 if (td->o.read_iolog_file) 283 memcpy(&td->last_issue, &io_u->issue_time, 284 sizeof(struct timeval)); 285 } 286 287 if (ddir_rw(ddir)) { 288 td->io_issues[ddir]++; 289 td->io_issue_bytes[ddir] += buflen; 290 td->rate_io_issue_bytes[ddir] += buflen; 291 } 292 293 ret = td->io_ops->queue(td, io_u); 294 295 unlock_file(td, io_u->file); 296 297 if (ret == FIO_Q_BUSY && ddir_rw(ddir)) { 298 td->io_issues[ddir]--; 299 td->io_issue_bytes[ddir] -= buflen; 300 td->rate_io_issue_bytes[ddir] -= buflen; 301 io_u_clear(td, io_u, IO_U_F_FLIGHT); 302 } 303 304 /* 305 * If an error was seen and the io engine didn't propagate it 306 * back to 'td', do so. 307 */ 308 if (io_u->error && !td->error) 309 td_verror(td, io_u->error, "td_io_queue"); 310 311 /* 312 * Add warning for O_DIRECT so that users have an easier time 313 * spotting potentially bad alignment. If this triggers for the first 314 * IO, then it's likely an alignment problem or because the host fs 315 * does not support O_DIRECT 316 */ 317 if (io_u->error == EINVAL && td->io_issues[io_u->ddir & 1] == 1 && 318 td->o.odirect) { 319 320 log_info("fio: first direct IO errored. File system may not " 321 "support direct IO, or iomem_align= is bad. Try " 322 "setting direct=0.\n"); 323 } 324 325 if (!td->io_ops->commit || io_u->ddir == DDIR_TRIM) { 326 io_u_mark_submit(td, 1); 327 io_u_mark_complete(td, 1); 328 } 329 330 if (ret == FIO_Q_COMPLETED) { 331 if (ddir_rw(io_u->ddir)) { 332 io_u_mark_depth(td, 1); 333 td->ts.total_io_u[io_u->ddir]++; 334 } 335 } else if (ret == FIO_Q_QUEUED) { 336 int r; 337 338 td->io_u_queued++; 339 340 if (ddir_rw(io_u->ddir)) 341 td->ts.total_io_u[io_u->ddir]++; 342 343 if (td->io_u_queued >= td->o.iodepth_batch) { 344 r = td_io_commit(td); 345 if (r < 0) 346 return r; 347 } 348 } 349 350 if (!td_ioengine_flagged(td, FIO_SYNCIO)) { 351 if (fio_fill_issue_time(td)) 352 fio_gettime(&io_u->issue_time, NULL); 353 354 /* 355 * only used for iolog 356 */ 357 if (td->o.read_iolog_file) 358 memcpy(&td->last_issue, &io_u->issue_time, 359 sizeof(struct timeval)); 360 } 361 362 return ret; 363 } 364 365 int td_io_init(struct thread_data *td) 366 { 367 int ret = 0; 368 369 if (td->io_ops->init) { 370 ret = td->io_ops->init(td); 371 if (ret) 372 log_err("fio: io engine %s init failed.%s\n", 373 td->io_ops->name, 374 td->o.iodepth > 1 ? 375 " Perhaps try reducing io depth?" : ""); 376 else 377 td->io_ops_init = 1; 378 if (!td->error) 379 td->error = ret; 380 } 381 382 return ret; 383 } 384 385 int td_io_commit(struct thread_data *td) 386 { 387 int ret; 388 389 dprint(FD_IO, "calling ->commit(), depth %d\n", td->cur_depth); 390 391 if (!td->cur_depth || !td->io_u_queued) 392 return 0; 393 394 io_u_mark_depth(td, td->io_u_queued); 395 396 if (td->io_ops->commit) { 397 ret = td->io_ops->commit(td); 398 if (ret) 399 td_verror(td, -ret, "io commit"); 400 } 401 402 /* 403 * Reflect that events were submitted as async IO requests. 404 */ 405 td->io_u_in_flight += td->io_u_queued; 406 td->io_u_queued = 0; 407 408 return 0; 409 } 410 411 int td_io_open_file(struct thread_data *td, struct fio_file *f) 412 { 413 assert(!fio_file_open(f)); 414 assert(f->fd == -1); 415 416 if (td->io_ops->open_file(td, f)) { 417 if (td->error == EINVAL && td->o.odirect) 418 log_err("fio: destination does not support O_DIRECT\n"); 419 if (td->error == EMFILE) { 420 log_err("fio: try reducing/setting openfiles (failed" 421 " at %u of %u)\n", td->nr_open_files, 422 td->o.nr_files); 423 } 424 425 assert(f->fd == -1); 426 assert(!fio_file_open(f)); 427 return 1; 428 } 429 430 fio_file_reset(td, f); 431 fio_file_set_open(f); 432 fio_file_clear_closing(f); 433 disk_util_inc(f->du); 434 435 td->nr_open_files++; 436 get_file(f); 437 438 if (f->filetype == FIO_TYPE_PIPE) { 439 if (td_random(td)) { 440 log_err("fio: can't seek on pipes (no random io)\n"); 441 goto err; 442 } 443 } 444 445 if (td_ioengine_flagged(td, FIO_DISKLESSIO)) 446 goto done; 447 448 if (td->o.invalidate_cache && file_invalidate_cache(td, f)) 449 goto err; 450 451 if (td->o.fadvise_hint != F_ADV_NONE && 452 (f->filetype == FIO_TYPE_BLOCK || f->filetype == FIO_TYPE_FILE)) { 453 int flags; 454 455 if (td->o.fadvise_hint == F_ADV_TYPE) { 456 if (td_random(td)) 457 flags = POSIX_FADV_RANDOM; 458 else 459 flags = POSIX_FADV_SEQUENTIAL; 460 } else if (td->o.fadvise_hint == F_ADV_RANDOM) 461 flags = POSIX_FADV_RANDOM; 462 else if (td->o.fadvise_hint == F_ADV_SEQUENTIAL) 463 flags = POSIX_FADV_SEQUENTIAL; 464 else { 465 log_err("fio: unknown fadvise type %d\n", 466 td->o.fadvise_hint); 467 flags = POSIX_FADV_NORMAL; 468 } 469 470 if (posix_fadvise(f->fd, f->file_offset, f->io_size, flags) < 0) { 471 td_verror(td, errno, "fadvise"); 472 goto err; 473 } 474 } 475 #ifdef FIO_HAVE_STREAMID 476 if (td->o.fadvise_stream && 477 (f->filetype == FIO_TYPE_BLOCK || f->filetype == FIO_TYPE_FILE)) { 478 off_t stream = td->o.fadvise_stream; 479 480 if (posix_fadvise(f->fd, stream, f->io_size, POSIX_FADV_STREAMID) < 0) { 481 td_verror(td, errno, "fadvise streamid"); 482 goto err; 483 } 484 } 485 #endif 486 487 #ifdef FIO_OS_DIRECTIO 488 /* 489 * Some OS's have a distinct call to mark the file non-buffered, 490 * instead of using O_DIRECT (Solaris) 491 */ 492 if (td->o.odirect) { 493 int ret = fio_set_odirect(f->fd); 494 495 if (ret) { 496 td_verror(td, ret, "fio_set_odirect"); 497 if (ret == ENOTTY) { /* ENOTTY suggests RAW device or ZFS */ 498 log_err("fio: doing directIO to RAW devices or ZFS not supported\n"); 499 } else { 500 log_err("fio: the file system does not seem to support direct IO\n"); 501 } 502 503 goto err; 504 } 505 } 506 #endif 507 508 done: 509 log_file(td, f, FIO_LOG_OPEN_FILE); 510 return 0; 511 err: 512 disk_util_dec(f->du); 513 if (td->io_ops->close_file) 514 td->io_ops->close_file(td, f); 515 return 1; 516 } 517 518 int td_io_close_file(struct thread_data *td, struct fio_file *f) 519 { 520 if (!fio_file_closing(f)) 521 log_file(td, f, FIO_LOG_CLOSE_FILE); 522 523 /* 524 * mark as closing, do real close when last io on it has completed 525 */ 526 fio_file_set_closing(f); 527 528 disk_util_dec(f->du); 529 530 if (td->o.file_lock_mode != FILE_LOCK_NONE) 531 unlock_file_all(td, f); 532 533 return put_file(td, f); 534 } 535 536 int td_io_unlink_file(struct thread_data *td, struct fio_file *f) 537 { 538 if (td->io_ops->unlink_file) 539 return td->io_ops->unlink_file(td, f); 540 else { 541 int ret; 542 543 ret = unlink(f->file_name); 544 if (ret < 0) 545 return errno; 546 547 return 0; 548 } 549 } 550 551 int td_io_get_file_size(struct thread_data *td, struct fio_file *f) 552 { 553 if (!td->io_ops->get_file_size) 554 return 0; 555 556 return td->io_ops->get_file_size(td, f); 557 } 558 559 int fio_show_ioengine_help(const char *engine) 560 { 561 struct flist_head *entry; 562 struct thread_data td; 563 struct ioengine_ops *io_ops; 564 char *sep; 565 int ret = 1; 566 567 if (!engine || !*engine) { 568 log_info("Available IO engines:\n"); 569 flist_for_each(entry, &engine_list) { 570 io_ops = flist_entry(entry, struct ioengine_ops, list); 571 log_info("\t%s\n", io_ops->name); 572 } 573 return 0; 574 } 575 sep = strchr(engine, ','); 576 if (sep) { 577 *sep = 0; 578 sep++; 579 } 580 581 memset(&td, 0, sizeof(td)); 582 583 io_ops = load_ioengine(&td, engine); 584 if (!io_ops) { 585 log_info("IO engine %s not found\n", engine); 586 return 1; 587 } 588 589 if (io_ops->options) 590 ret = show_cmd_help(io_ops->options, sep); 591 else 592 log_info("IO engine %s has no options\n", io_ops->name); 593 594 free_ioengine(&td); 595 596 return ret; 597 } 598