1 /* 2 * version of copy command using async i/o 3 * From: Stephen Hemminger <shemminger (at) osdl.org> 4 * Modified by Daniel McNeil <daniel (at) osdl.org> for testing aio. 5 * - added -a alignment 6 * - added -b blksize option 7 * _ added -s size option 8 * - added -f open_flag option 9 * - added -w (no write) option (reads from source only) 10 * - added -n (num aio) option 11 * - added -z (zero dest) opton (writes zeros to dest only) 12 * - added -D delay_ms option 13 * 14 * Copy file by using a async I/O state machine. 15 * 1. Start read request 16 * 2. When read completes turn it into a write request 17 * 3. When write completes decrement counter and free resources 18 * 19 * 20 * Usage: aiocp [-b blksize] -n [num_aio] [-w] [-z] [-s filesize] 21 * [-f DIRECT|TRUNC|CREAT|SYNC|LARGEFILE] src dest 22 */ 23 24 #define _GNU_SOURCE 25 26 #include <unistd.h> 27 #include <stdio.h> 28 #include <sys/types.h> 29 #include <sys/stat.h> 30 #include <sys/param.h> 31 #include <fcntl.h> 32 #include <errno.h> 33 #include <stdlib.h> 34 #include <mntent.h> 35 #include <sys/select.h> 36 #include <sys/mount.h> 37 #include "config.h" 38 39 #if HAVE_LIBAIO_H 40 41 #include <libaio.h> 42 43 #define AIO_BLKSIZE (64*1024) 44 #define AIO_MAXIO 32 45 46 static int aio_blksize = AIO_BLKSIZE; 47 static int aio_maxio = AIO_MAXIO; 48 49 static int busy = 0; // # of I/O's in flight 50 static int tocopy = 0; // # of blocks left to copy 51 static int srcfd; // source fd 52 static int srcfd2; // source fd - end of file non-sector 53 static int dstfd = -1; // destination file descriptor 54 static int dstfd2 = -1; // Handle end of file for non-sector size 55 static const char *dstname = NULL; 56 static const char *srcname = NULL; 57 static int source_open_flag = O_RDONLY; /* open flags on source file */ 58 static int dest_open_flag = O_WRONLY; /* open flags on dest file */ 59 static int no_write; /* do not write */ 60 static int zero; /* write zero's only */ 61 62 static int debug; 63 static int count_io_q_waits; /* how many time io_queue_wait called */ 64 65 struct iocb **iocb_free; /* array of pointers to iocb */ 66 int iocb_free_count; /* current free count */ 67 int alignment = 512; /* buffer alignment */ 68 69 struct timeval delay; /* delay between i/o */ 70 71 static int dev_block_size_by_path(const char *path) 72 { 73 FILE *f; 74 struct mntent *mnt; 75 size_t prefix_len, prefix_max = 0; 76 char dev_name[1024]; 77 int fd, size; 78 79 if (!path) 80 return 0; 81 82 f = setmntent("/proc/mounts", "r"); 83 if (!f) { 84 fprintf(stderr, "Failed to open /proc/mounts\n"); 85 return 0; 86 } 87 88 while ((mnt = getmntent(f))) { 89 /* Skip pseudo fs */ 90 if (mnt->mnt_fsname[0] != '/') 91 continue; 92 93 prefix_len = strlen(mnt->mnt_dir); 94 95 if (prefix_len > prefix_max && 96 !strncmp(path, mnt->mnt_dir, prefix_len)) { 97 prefix_max = prefix_len; 98 strncpy(dev_name, mnt->mnt_fsname, sizeof(dev_name)); 99 dev_name[sizeof(dev_name)-1] = '\0'; 100 } 101 } 102 103 endmntent(f); 104 105 if (!prefix_max) { 106 fprintf(stderr, "Path '%s' not found in /proc/mounts\n", path); 107 return 0; 108 } 109 110 printf("Path '%s' is on device '%s'\n", path, dev_name); 111 112 fd = open(dev_name, O_RDONLY); 113 if (!fd) { 114 fprintf(stderr, "open('%s'): %s\n", dev_name, strerror(errno)); 115 return 0; 116 } 117 118 if (ioctl(fd, BLKSSZGET, &size)) { 119 fprintf(stderr, "ioctl(BLKSSZGET): %s\n", strerror(errno)); 120 close(fd); 121 return 0; 122 } 123 124 close(fd); 125 printf("'%s' has block size %i\n", dev_name, size); 126 127 return size; 128 } 129 130 int init_iocb(int n, int iosize) 131 { 132 void *buf; 133 int i; 134 135 if ((iocb_free = malloc(n * sizeof(struct iocb *))) == 0) { 136 return -1; 137 } 138 139 for (i = 0; i < n; i++) { 140 if (! 141 (iocb_free[i] = malloc(sizeof(struct iocb)))) 142 return -1; 143 if (posix_memalign(&buf, alignment, iosize)) 144 return -1; 145 if (debug > 1) { 146 printf("buf allocated at 0x%p, align:%d\n", 147 buf, alignment); 148 } 149 if (zero) { 150 /* 151 * We are writing zero's to dstfd 152 */ 153 memset(buf, 0, iosize); 154 } 155 io_prep_pread(iocb_free[i], -1, buf, iosize, 0); 156 } 157 iocb_free_count = i; 158 return 0; 159 } 160 161 static struct iocb *alloc_iocb(void) 162 { 163 if (!iocb_free_count) 164 return 0; 165 return iocb_free[--iocb_free_count]; 166 } 167 168 void free_iocb(struct iocb *io) 169 { 170 iocb_free[iocb_free_count++] = io; 171 } 172 173 /* 174 * io_wait_run() - wait for an io_event and then call the callback. 175 */ 176 int io_wait_run(io_context_t ctx, struct timespec *to) 177 { 178 struct io_event events[aio_maxio]; 179 struct io_event *ep; 180 int ret, n; 181 182 /* 183 * get up to aio_maxio events at a time. 184 */ 185 ret = n = io_getevents(ctx, 1, aio_maxio, events, to); 186 187 /* 188 * Call the callback functions for each event. 189 */ 190 for (ep = events; n-- > 0; ep++) { 191 io_callback_t cb = (io_callback_t) ep->data; 192 struct iocb *iocb = ep->obj; 193 194 if (debug > 1) { 195 fprintf(stderr, "ev:%p iocb:%p res:%ld res2:%ld\n", 196 ep, iocb, ep->res, ep->res2); 197 } 198 cb(ctx, iocb, ep->res, ep->res2); 199 } 200 return ret; 201 } 202 203 /* Fatal error handler */ 204 static void io_error(const char *func, int rc) 205 { 206 if (rc == -ENOSYS) 207 fprintf(stderr, "AIO not in this kernel\n"); 208 else if (rc < 0) 209 fprintf(stderr, "%s: %s\n", func, strerror(-rc)); 210 else 211 fprintf(stderr, "%s: error %d\n", func, rc); 212 213 if (dstfd > 0) 214 close(dstfd); 215 if (dstname && dest_open_flag & O_CREAT) 216 unlink(dstname); 217 exit(1); 218 } 219 220 /* 221 * Write complete callback. 222 * Adjust counts and free resources 223 */ 224 static void wr_done(io_context_t ctx, struct iocb *iocb, long res, long res2) 225 { 226 if (res2 != 0) { 227 io_error("aio write", res2); 228 } 229 if (res != iocb->u.c.nbytes) { 230 fprintf(stderr, "write missed bytes expect %lu got %ld\n", 231 iocb->u.c.nbytes, res); 232 exit(1); 233 } 234 --tocopy; 235 --busy; 236 free_iocb(iocb); 237 if (debug) 238 write(2, "w", 1); 239 } 240 241 /* 242 * Read complete callback. 243 * Change read iocb into a write iocb and start it. 244 */ 245 static void rd_done(io_context_t ctx, struct iocb *iocb, long res, long res2) 246 { 247 /* library needs accessors to look at iocb? */ 248 int iosize = iocb->u.c.nbytes; 249 char *buf = iocb->u.c.buf; 250 off_t offset = iocb->u.c.offset; 251 252 if (res2 != 0) 253 io_error("aio read", res2); 254 if (res != iosize) { 255 fprintf(stderr, "read missing bytes expect %lu got %ld\n", 256 iocb->u.c.nbytes, res); 257 exit(1); 258 } 259 260 /* turn read into write */ 261 if (no_write) { 262 --tocopy; 263 --busy; 264 free_iocb(iocb); 265 } else { 266 int fd; 267 if (iocb->aio_fildes == srcfd) 268 fd = dstfd; 269 else 270 fd = dstfd2; 271 io_prep_pwrite(iocb, fd, buf, iosize, offset); 272 io_set_callback(iocb, wr_done); 273 if (1 != (res = io_submit(ctx, 1, &iocb))) 274 io_error("io_submit write", res); 275 } 276 if (debug) 277 write(2, "r", 1); 278 if (debug > 1) 279 printf("%d", iosize); 280 } 281 282 static void usage(void) 283 { 284 fprintf(stderr, 285 "Usage: aiocp [-a align] [-s size] [-b blksize] [-n num_io]" 286 " [-f open_flag] SOURCE DEST\n" 287 "This copies from SOURCE to DEST using AIO.\n\n" 288 "Usage: aiocp [options] -w SOURCE\n" 289 "This does sequential AIO reads (no writes).\n\n" 290 "Usage: aiocp [options] -z DEST\n" 291 "This does sequential AIO writes of zeros.\n"); 292 293 exit(1); 294 } 295 296 /* 297 * Scale value by kilo, mega, or giga. 298 */ 299 long long scale_by_kmg(long long value, char scale) 300 { 301 switch (scale) { 302 case 'g': 303 case 'G': 304 value *= 1024; 305 case 'm': 306 case 'M': 307 value *= 1024; 308 case 'k': 309 case 'K': 310 value *= 1024; 311 break; 312 case '\0': 313 break; 314 default: 315 usage(); 316 break; 317 } 318 return value; 319 } 320 321 int main(int argc, char *const *argv) 322 { 323 struct stat st; 324 off_t length = 0, offset = 0; 325 off_t leftover = 0; 326 io_context_t myctx; 327 int c; 328 extern char *optarg; 329 extern int optind, opterr, optopt; 330 331 while ((c = getopt(argc, argv, "a:b:df:n:s:wzD:")) != -1) { 332 char *endp; 333 334 switch (c) { 335 case 'a': /* alignment of data buffer */ 336 alignment = strtol(optarg, &endp, 0); 337 alignment = (long)scale_by_kmg((long long)alignment, 338 *endp); 339 break; 340 case 'f': /* use these open flags */ 341 if (strcmp(optarg, "LARGEFILE") == 0 || 342 strcmp(optarg, "O_LARGEFILE") == 0) { 343 source_open_flag |= O_LARGEFILE; 344 dest_open_flag |= O_LARGEFILE; 345 } else if (strcmp(optarg, "TRUNC") == 0 || 346 strcmp(optarg, "O_TRUNC") == 0) { 347 dest_open_flag |= O_TRUNC; 348 } else if (strcmp(optarg, "SYNC") == 0 || 349 strcmp(optarg, "O_SYNC") == 0) { 350 dest_open_flag |= O_SYNC; 351 } else if (strcmp(optarg, "DIRECT") == 0 || 352 strcmp(optarg, "O_DIRECT") == 0) { 353 source_open_flag |= O_DIRECT; 354 dest_open_flag |= O_DIRECT; 355 } else if (strncmp(optarg, "CREAT", 5) == 0 || 356 strncmp(optarg, "O_CREAT", 5) == 0) { 357 dest_open_flag |= O_CREAT; 358 } 359 break; 360 case 'd': 361 debug++; 362 break; 363 case 'D': 364 delay.tv_usec = atoi(optarg); 365 break; 366 case 'b': /* block size */ 367 aio_blksize = strtol(optarg, &endp, 0); 368 aio_blksize = 369 (long)scale_by_kmg((long long)aio_blksize, *endp); 370 break; 371 372 case 'n': /* num io */ 373 aio_maxio = strtol(optarg, &endp, 0); 374 break; 375 case 's': /* size to transfer */ 376 length = strtoll(optarg, &endp, 0); 377 length = scale_by_kmg(length, *endp); 378 break; 379 case 'w': /* no write */ 380 no_write = 1; 381 break; 382 case 'z': /* write zero's */ 383 zero = 1; 384 break; 385 386 default: 387 usage(); 388 } 389 } 390 391 argc -= optind; 392 argv += optind; 393 394 if (argc < 1) { 395 usage(); 396 } 397 if (!zero) { 398 if ((srcfd = open(srcname = *argv, source_open_flag)) < 0) { 399 perror(srcname); 400 exit(1); 401 } 402 argv++; 403 argc--; 404 if (fstat(srcfd, &st) < 0) { 405 perror("fstat"); 406 exit(1); 407 } 408 if (length == 0) 409 length = st.st_size; 410 } 411 412 if (!no_write) { 413 /* 414 * We are either copying or writing zeros to dstname 415 */ 416 if (argc < 1) { 417 usage(); 418 } 419 if ((dstfd = open(dstname = *argv, dest_open_flag, 0666)) < 0) { 420 perror(dstname); 421 exit(1); 422 } 423 if (zero) { 424 /* 425 * get size of dest, if we are zeroing it. 426 * TODO: handle devices. 427 */ 428 if (fstat(dstfd, &st) < 0) { 429 perror("fstat"); 430 exit(1); 431 } 432 if (length == 0) 433 length = st.st_size; 434 } 435 } 436 /* 437 * O_DIRECT cannot handle non-sector sizes 438 */ 439 if (dest_open_flag & O_DIRECT) { 440 int src_alignment = dev_block_size_by_path(srcname); 441 int dst_alignment = dev_block_size_by_path(dstname); 442 443 /* 444 * Given we expect the block sizes to be multiple of 2 the 445 * larger is always divideable by the smaller, so we only need 446 * to care about maximum. 447 */ 448 if (src_alignment > dst_alignment) 449 dst_alignment = src_alignment; 450 451 if (alignment < dst_alignment) { 452 alignment = dst_alignment; 453 printf("Forcing aligment to %i\n", alignment); 454 } 455 456 if (aio_blksize % alignment) { 457 printf("Block size is not multiple of drive block size\n"); 458 printf("Skipping the test!\n"); 459 exit(0); 460 } 461 462 leftover = length % alignment; 463 if (leftover) { 464 int flag; 465 466 length -= leftover; 467 if (!zero) { 468 flag = source_open_flag & ~O_DIRECT; 469 srcfd2 = open(srcname, flag); 470 if (srcfd2 < 0) { 471 perror(srcname); 472 exit(1); 473 } 474 } 475 if (!no_write) { 476 flag = (O_SYNC | dest_open_flag) & 477 ~(O_DIRECT | O_CREAT); 478 dstfd2 = open(dstname, flag); 479 if (dstfd2 < 0) { 480 perror(dstname); 481 exit(1); 482 } 483 } 484 } 485 } 486 487 /* initialize state machine */ 488 memset(&myctx, 0, sizeof(myctx)); 489 io_queue_init(aio_maxio, &myctx); 490 tocopy = howmany(length, aio_blksize); 491 492 if (init_iocb(aio_maxio, aio_blksize) < 0) { 493 fprintf(stderr, "Error allocating the i/o buffers\n"); 494 exit(1); 495 } 496 497 while (tocopy > 0) { 498 int i, rc; 499 /* Submit as many reads as once as possible upto aio_maxio */ 500 int n = MIN(MIN(aio_maxio - busy, aio_maxio), 501 howmany(length - offset, aio_blksize)); 502 if (n > 0) { 503 struct iocb *ioq[n]; 504 505 for (i = 0; i < n; i++) { 506 struct iocb *io = alloc_iocb(); 507 int iosize = MIN(length - offset, aio_blksize); 508 509 if (zero) { 510 /* 511 * We are writing zero's to dstfd 512 */ 513 io_prep_pwrite(io, dstfd, io->u.c.buf, 514 iosize, offset); 515 io_set_callback(io, wr_done); 516 } else { 517 io_prep_pread(io, srcfd, io->u.c.buf, 518 iosize, offset); 519 io_set_callback(io, rd_done); 520 } 521 ioq[i] = io; 522 offset += iosize; 523 } 524 525 rc = io_submit(myctx, n, ioq); 526 if (rc < 0) 527 io_error("io_submit", rc); 528 529 busy += n; 530 if (debug > 1) 531 printf("io_submit(%d) busy:%d\n", n, busy); 532 if (delay.tv_usec) { 533 struct timeval t = delay; 534 (void)select(0, 0, 0, 0, &t); 535 } 536 } 537 538 /* 539 * We have submitted all the i/o requests. Wait for at least one to complete 540 * and call the callbacks. 541 */ 542 count_io_q_waits++; 543 rc = io_wait_run(myctx, 0); 544 if (rc < 0) 545 io_error("io_wait_run", rc); 546 547 if (debug > 1) { 548 printf("io_wait_run: rc == %d\n", rc); 549 printf("busy:%d aio_maxio:%d tocopy:%d\n", 550 busy, aio_maxio, tocopy); 551 } 552 } 553 554 if (leftover) { 555 /* non-sector size end of file */ 556 struct iocb *io = alloc_iocb(); 557 int rc; 558 if (zero) { 559 /* 560 * We are writing zero's to dstfd2 561 */ 562 io_prep_pwrite(io, dstfd2, io->u.c.buf, 563 leftover, offset); 564 io_set_callback(io, wr_done); 565 } else { 566 io_prep_pread(io, srcfd2, io->u.c.buf, 567 leftover, offset); 568 io_set_callback(io, rd_done); 569 } 570 rc = io_submit(myctx, 1, &io); 571 if (rc < 0) 572 io_error("io_submit", rc); 573 count_io_q_waits++; 574 rc = io_wait_run(myctx, 0); 575 if (rc < 0) 576 io_error("io_wait_run", rc); 577 } 578 579 if (srcfd != -1) 580 close(srcfd); 581 if (dstfd != -1) 582 close(dstfd); 583 exit(0); 584 } 585 586 /* 587 * Results look like: 588 * [alanm@toolbox ~/MOT3]$ ../taio -d kernel-source-2.4.8-0.4g.ppc.rpm abc 589 * rrrrrrrrrrrrrrrwwwrwrrwwrrwrwwrrwrwrwwrrwrwrrrrwwrwwwrrwrrrwwwwwwwwwwwwwwwww 590 * rrrrrrrrrrrrrrwwwrrwrwrwrwrrwwwwwwwwwwwwwwrrrrrrrrrrrrrrrrrrwwwwrwrwwrwrwrwr 591 * wrrrrrrrwwwwwwwwwwwwwrrrwrrrwrrwrwwwwwwwwwwrrrrwwrwrrrrrrrrrrrwwwwwwwwwwwrww 592 * wwwrrrrrrrrwwrrrwwrwrwrwwwrrrrrrrwwwrrwwwrrwrwwwwwwwwrrrrrrrwwwrrrrrrrwwwwww 593 * wwwwwwwrwrrrrrrrrwrrwrrwrrwrwrrrwrrrwrrrwrwwwwwwwwwwwwwwwwwwrrrwwwrrrrrrrrrr 594 * rrwrrrrrrwrrwwwwwwwwwwwwwwwwrwwwrrwrwwrrrrrrrrrrrrrrrrrrrwwwwwwwwwwwwwwwwwww 595 * rrrrrwrrwrwrwrrwrrrwwwwwwwwrrrrwrrrwrwwrwrrrwrrwrrrrwwwwwwwrwrwwwwrwwrrrwrrr 596 * rrrwwwwwwwrrrrwwrrrrrrrrrrrrwrwrrrrwwwwwwwwwwwwwwrwrrrrwwwwrwrrrrwrwwwrrrwww 597 * rwwrrrrrrrwrrrrrrrrrrrrwwwwrrrwwwrwrrwwwwwwwwwwwwwwwwwwwwwrrrrrrrwwwwwwwrw 598 */ 599 600 #else 601 602 int main(void) 603 { 604 fprintf(stderr, "System doesn't have libaio support.\n"); 605 return 1; 606 } 607 608 #endif 609