1 /* 2 * version of copy command using async i/o 3 * From: Stephen Hemminger <shemminger (at) osdl.org> 4 * Modified by Daniel McNeil <daniel (at) osdl.org> for testing aio. 5 * - added -a alignment 6 * - added -b blksize option 7 * _ added -s size option 8 * - added -f open_flag option 9 * - added -w (no write) option (reads from source only) 10 * - added -n (num aio) option 11 * - added -z (zero dest) opton (writes zeros to dest only) 12 * - added -D delay_ms option 13 * 14 * Copy file by using a async I/O state machine. 15 * 1. Start read request 16 * 2. When read completes turn it into a write request 17 * 3. When write completes decrement counter and free resources 18 * 19 * 20 * Usage: aiocp [-b blksize] -n [num_aio] [-w] [-z] [-s filesize] 21 * [-f DIRECT|TRUNC|CREAT|SYNC|LARGEFILE] src dest 22 */ 23 24 #define _GNU_SOURCE 25 26 #include <unistd.h> 27 #include <stdio.h> 28 #include <sys/types.h> 29 #include <sys/stat.h> 30 #include <sys/param.h> 31 #include <fcntl.h> 32 #include <errno.h> 33 #include <stdlib.h> 34 #include <mntent.h> 35 #include <sys/select.h> 36 #include <sys/mount.h> 37 38 #include "config.h" 39 #include "tst_res_flags.h" 40 41 #ifdef HAVE_LIBAIO 42 #include <libaio.h> 43 44 #define AIO_BLKSIZE (64*1024) 45 #define AIO_MAXIO 32 46 47 static int aio_blksize = AIO_BLKSIZE; 48 static int aio_maxio = AIO_MAXIO; 49 50 static int busy = 0; // # of I/O's in flight 51 static int tocopy = 0; // # of blocks left to copy 52 static int srcfd; // source fd 53 static int srcfd2; // source fd - end of file non-sector 54 static int dstfd = -1; // destination file descriptor 55 static int dstfd2 = -1; // Handle end of file for non-sector size 56 static const char *dstname = NULL; 57 static const char *srcname = NULL; 58 static int source_open_flag = O_RDONLY; /* open flags on source file */ 59 static int dest_open_flag = O_WRONLY; /* open flags on dest file */ 60 static int no_write; /* do not write */ 61 static int zero; /* write zero's only */ 62 63 static int debug; 64 static int count_io_q_waits; /* how many time io_queue_wait called */ 65 66 struct iocb **iocb_free; /* array of pointers to iocb */ 67 int iocb_free_count; /* current free count */ 68 int alignment = 512; /* buffer alignment */ 69 70 struct timeval delay; /* delay between i/o */ 71 72 static int dev_block_size_by_path(const char *path) 73 { 74 FILE *f; 75 struct mntent *mnt; 76 size_t prefix_len, prefix_max = 0; 77 char dev_name[1024]; 78 int fd, size; 79 80 if (!path) 81 return 0; 82 83 f = setmntent("/proc/mounts", "r"); 84 if (!f) { 85 fprintf(stderr, "Failed to open /proc/mounts\n"); 86 return 0; 87 } 88 89 while ((mnt = getmntent(f))) { 90 /* Skip pseudo fs */ 91 if (mnt->mnt_fsname[0] != '/') 92 continue; 93 94 prefix_len = strlen(mnt->mnt_dir); 95 96 if (prefix_len > prefix_max && 97 !strncmp(path, mnt->mnt_dir, prefix_len)) { 98 prefix_max = prefix_len; 99 strncpy(dev_name, mnt->mnt_fsname, sizeof(dev_name)); 100 dev_name[sizeof(dev_name)-1] = '\0'; 101 } 102 } 103 104 endmntent(f); 105 106 if (!prefix_max) { 107 fprintf(stderr, "Path '%s' not found in /proc/mounts\n", path); 108 return 0; 109 } 110 111 printf("Path '%s' is on device '%s'\n", path, dev_name); 112 113 fd = open(dev_name, O_RDONLY); 114 if (!fd) { 115 fprintf(stderr, "open('%s'): %s\n", dev_name, strerror(errno)); 116 return 0; 117 } 118 119 if (ioctl(fd, BLKSSZGET, &size)) { 120 fprintf(stderr, "ioctl(BLKSSZGET): %s\n", strerror(errno)); 121 close(fd); 122 return 0; 123 } 124 125 close(fd); 126 printf("'%s' has block size %i\n", dev_name, size); 127 128 return size; 129 } 130 131 int init_iocb(int n, int iosize) 132 { 133 void *buf; 134 int i; 135 136 if ((iocb_free = malloc(n * sizeof(struct iocb *))) == 0) { 137 return -1; 138 } 139 140 for (i = 0; i < n; i++) { 141 if (! 142 (iocb_free[i] = malloc(sizeof(struct iocb)))) 143 return -1; 144 if (posix_memalign(&buf, alignment, iosize)) 145 return -1; 146 if (debug > 1) { 147 printf("buf allocated at 0x%p, align:%d\n", 148 buf, alignment); 149 } 150 if (zero) { 151 /* 152 * We are writing zero's to dstfd 153 */ 154 memset(buf, 0, iosize); 155 } 156 io_prep_pread(iocb_free[i], -1, buf, iosize, 0); 157 } 158 iocb_free_count = i; 159 return 0; 160 } 161 162 static struct iocb *alloc_iocb(void) 163 { 164 if (!iocb_free_count) 165 return 0; 166 return iocb_free[--iocb_free_count]; 167 } 168 169 void free_iocb(struct iocb *io) 170 { 171 iocb_free[iocb_free_count++] = io; 172 } 173 174 /* 175 * io_wait_run() - wait for an io_event and then call the callback. 176 */ 177 int io_wait_run(io_context_t ctx, struct timespec *to) 178 { 179 struct io_event events[aio_maxio]; 180 struct io_event *ep; 181 int ret, n; 182 183 /* 184 * get up to aio_maxio events at a time. 185 */ 186 ret = n = io_getevents(ctx, 1, aio_maxio, events, to); 187 188 /* 189 * Call the callback functions for each event. 190 */ 191 for (ep = events; n-- > 0; ep++) { 192 io_callback_t cb = (io_callback_t) ep->data; 193 struct iocb *iocb = ep->obj; 194 195 if (debug > 1) { 196 fprintf(stderr, "ev:%p iocb:%p res:%ld res2:%ld\n", 197 ep, iocb, ep->res, ep->res2); 198 } 199 cb(ctx, iocb, ep->res, ep->res2); 200 } 201 return ret; 202 } 203 204 /* Fatal error handler */ 205 static void io_error(const char *func, int rc) 206 { 207 if (rc == -ENOSYS) 208 fprintf(stderr, "AIO not in this kernel\n"); 209 else if (rc < 0) 210 fprintf(stderr, "%s: %s\n", func, strerror(-rc)); 211 else 212 fprintf(stderr, "%s: error %d\n", func, rc); 213 214 if (dstfd > 0) 215 close(dstfd); 216 if (dstname && dest_open_flag & O_CREAT) 217 unlink(dstname); 218 exit(1); 219 } 220 221 /* 222 * Write complete callback. 223 * Adjust counts and free resources 224 */ 225 static void wr_done(io_context_t ctx, struct iocb *iocb, long res, long res2) 226 { 227 if (res2 != 0) { 228 io_error("aio write", res2); 229 } 230 if (res != iocb->u.c.nbytes) { 231 fprintf(stderr, "write missed bytes expect %lu got %ld\n", 232 iocb->u.c.nbytes, res); 233 exit(1); 234 } 235 --tocopy; 236 --busy; 237 free_iocb(iocb); 238 if (debug) 239 write(2, "w", 1); 240 } 241 242 /* 243 * Read complete callback. 244 * Change read iocb into a write iocb and start it. 245 */ 246 static void rd_done(io_context_t ctx, struct iocb *iocb, long res, long res2) 247 { 248 /* library needs accessors to look at iocb? */ 249 int iosize = iocb->u.c.nbytes; 250 char *buf = iocb->u.c.buf; 251 off_t offset = iocb->u.c.offset; 252 253 if (res2 != 0) 254 io_error("aio read", res2); 255 if (res != iosize) { 256 fprintf(stderr, "read missing bytes expect %lu got %ld\n", 257 iocb->u.c.nbytes, res); 258 exit(1); 259 } 260 261 /* turn read into write */ 262 if (no_write) { 263 --tocopy; 264 --busy; 265 free_iocb(iocb); 266 } else { 267 int fd; 268 if (iocb->aio_fildes == srcfd) 269 fd = dstfd; 270 else 271 fd = dstfd2; 272 io_prep_pwrite(iocb, fd, buf, iosize, offset); 273 io_set_callback(iocb, wr_done); 274 if (1 != (res = io_submit(ctx, 1, &iocb))) 275 io_error("io_submit write", res); 276 } 277 if (debug) 278 write(2, "r", 1); 279 if (debug > 1) 280 printf("%d", iosize); 281 } 282 283 static void usage(void) 284 { 285 fprintf(stderr, 286 "Usage: aiocp [-a align] [-s size] [-b blksize] [-n num_io]" 287 " [-f open_flag] SOURCE DEST\n" 288 "This copies from SOURCE to DEST using AIO.\n\n" 289 "Usage: aiocp [options] -w SOURCE\n" 290 "This does sequential AIO reads (no writes).\n\n" 291 "Usage: aiocp [options] -z DEST\n" 292 "This does sequential AIO writes of zeros.\n"); 293 294 exit(1); 295 } 296 297 /* 298 * Scale value by kilo, mega, or giga. 299 */ 300 long long scale_by_kmg(long long value, char scale) 301 { 302 switch (scale) { 303 case 'g': 304 case 'G': 305 value *= 1024; 306 case 'm': 307 case 'M': 308 value *= 1024; 309 case 'k': 310 case 'K': 311 value *= 1024; 312 break; 313 case '\0': 314 break; 315 default: 316 usage(); 317 break; 318 } 319 return value; 320 } 321 322 int main(int argc, char *const *argv) 323 { 324 struct stat st; 325 off_t length = 0, offset = 0; 326 off_t leftover = 0; 327 io_context_t myctx; 328 int c; 329 extern char *optarg; 330 extern int optind, opterr, optopt; 331 332 while ((c = getopt(argc, argv, "a:b:df:n:s:wzD:")) != -1) { 333 char *endp; 334 335 switch (c) { 336 case 'a': /* alignment of data buffer */ 337 alignment = strtol(optarg, &endp, 0); 338 alignment = (long)scale_by_kmg((long long)alignment, 339 *endp); 340 break; 341 case 'f': /* use these open flags */ 342 if (strcmp(optarg, "LARGEFILE") == 0 || 343 strcmp(optarg, "O_LARGEFILE") == 0) { 344 source_open_flag |= O_LARGEFILE; 345 dest_open_flag |= O_LARGEFILE; 346 } else if (strcmp(optarg, "TRUNC") == 0 || 347 strcmp(optarg, "O_TRUNC") == 0) { 348 dest_open_flag |= O_TRUNC; 349 } else if (strcmp(optarg, "SYNC") == 0 || 350 strcmp(optarg, "O_SYNC") == 0) { 351 dest_open_flag |= O_SYNC; 352 } else if (strcmp(optarg, "DIRECT") == 0 || 353 strcmp(optarg, "O_DIRECT") == 0) { 354 source_open_flag |= O_DIRECT; 355 dest_open_flag |= O_DIRECT; 356 } else if (strncmp(optarg, "CREAT", 5) == 0 || 357 strncmp(optarg, "O_CREAT", 5) == 0) { 358 dest_open_flag |= O_CREAT; 359 } 360 break; 361 case 'd': 362 debug++; 363 break; 364 case 'D': 365 delay.tv_usec = atoi(optarg); 366 break; 367 case 'b': /* block size */ 368 aio_blksize = strtol(optarg, &endp, 0); 369 aio_blksize = 370 (long)scale_by_kmg((long long)aio_blksize, *endp); 371 break; 372 373 case 'n': /* num io */ 374 aio_maxio = strtol(optarg, &endp, 0); 375 break; 376 case 's': /* size to transfer */ 377 length = strtoll(optarg, &endp, 0); 378 length = scale_by_kmg(length, *endp); 379 break; 380 case 'w': /* no write */ 381 no_write = 1; 382 break; 383 case 'z': /* write zero's */ 384 zero = 1; 385 break; 386 387 default: 388 usage(); 389 } 390 } 391 392 argc -= optind; 393 argv += optind; 394 395 if (argc < 1) { 396 usage(); 397 } 398 if (!zero) { 399 if ((srcfd = open(srcname = *argv, source_open_flag)) < 0) { 400 perror(srcname); 401 exit(1); 402 } 403 argv++; 404 argc--; 405 if (fstat(srcfd, &st) < 0) { 406 perror("fstat"); 407 exit(1); 408 } 409 if (length == 0) 410 length = st.st_size; 411 } 412 413 if (!no_write) { 414 /* 415 * We are either copying or writing zeros to dstname 416 */ 417 if (argc < 1) { 418 usage(); 419 } 420 if ((dstfd = open(dstname = *argv, dest_open_flag, 0666)) < 0) { 421 perror(dstname); 422 exit(1); 423 } 424 if (zero) { 425 /* 426 * get size of dest, if we are zeroing it. 427 * TODO: handle devices. 428 */ 429 if (fstat(dstfd, &st) < 0) { 430 perror("fstat"); 431 exit(1); 432 } 433 if (length == 0) 434 length = st.st_size; 435 } 436 } 437 /* 438 * O_DIRECT cannot handle non-sector sizes 439 */ 440 if (dest_open_flag & O_DIRECT) { 441 int src_alignment = dev_block_size_by_path(srcname); 442 int dst_alignment = dev_block_size_by_path(dstname); 443 444 /* 445 * Given we expect the block sizes to be multiple of 2 the 446 * larger is always divideable by the smaller, so we only need 447 * to care about maximum. 448 */ 449 if (src_alignment > dst_alignment) 450 dst_alignment = src_alignment; 451 452 if (alignment < dst_alignment) { 453 alignment = dst_alignment; 454 printf("Forcing aligment to %i\n", alignment); 455 } 456 457 if (aio_blksize % alignment) { 458 printf("Block size is not multiple of drive block size\n"); 459 printf("Skipping the test!\n"); 460 exit(0); 461 } 462 463 leftover = length % alignment; 464 if (leftover) { 465 int flag; 466 467 length -= leftover; 468 if (!zero) { 469 flag = source_open_flag & ~O_DIRECT; 470 srcfd2 = open(srcname, flag); 471 if (srcfd2 < 0) { 472 perror(srcname); 473 exit(1); 474 } 475 } 476 if (!no_write) { 477 flag = (O_SYNC | dest_open_flag) & 478 ~(O_DIRECT | O_CREAT); 479 dstfd2 = open(dstname, flag); 480 if (dstfd2 < 0) { 481 perror(dstname); 482 exit(1); 483 } 484 } 485 } 486 } 487 488 /* initialize state machine */ 489 memset(&myctx, 0, sizeof(myctx)); 490 io_queue_init(aio_maxio, &myctx); 491 tocopy = howmany(length, aio_blksize); 492 493 if (init_iocb(aio_maxio, aio_blksize) < 0) { 494 fprintf(stderr, "Error allocating the i/o buffers\n"); 495 exit(1); 496 } 497 498 while (tocopy > 0) { 499 int i, rc; 500 /* Submit as many reads as once as possible upto aio_maxio */ 501 int n = MIN(MIN(aio_maxio - busy, aio_maxio), 502 howmany(length - offset, aio_blksize)); 503 if (n > 0) { 504 struct iocb *ioq[n]; 505 506 for (i = 0; i < n; i++) { 507 struct iocb *io = alloc_iocb(); 508 int iosize = MIN(length - offset, aio_blksize); 509 510 if (zero) { 511 /* 512 * We are writing zero's to dstfd 513 */ 514 io_prep_pwrite(io, dstfd, io->u.c.buf, 515 iosize, offset); 516 io_set_callback(io, wr_done); 517 } else { 518 io_prep_pread(io, srcfd, io->u.c.buf, 519 iosize, offset); 520 io_set_callback(io, rd_done); 521 } 522 ioq[i] = io; 523 offset += iosize; 524 } 525 526 rc = io_submit(myctx, n, ioq); 527 if (rc < 0) 528 io_error("io_submit", rc); 529 530 busy += n; 531 if (debug > 1) 532 printf("io_submit(%d) busy:%d\n", n, busy); 533 if (delay.tv_usec) { 534 struct timeval t = delay; 535 (void)select(0, 0, 0, 0, &t); 536 } 537 } 538 539 /* 540 * We have submitted all the i/o requests. Wait for at least one to complete 541 * and call the callbacks. 542 */ 543 count_io_q_waits++; 544 rc = io_wait_run(myctx, 0); 545 if (rc < 0) 546 io_error("io_wait_run", rc); 547 548 if (debug > 1) { 549 printf("io_wait_run: rc == %d\n", rc); 550 printf("busy:%d aio_maxio:%d tocopy:%d\n", 551 busy, aio_maxio, tocopy); 552 } 553 } 554 555 if (leftover) { 556 /* non-sector size end of file */ 557 struct iocb *io = alloc_iocb(); 558 int rc; 559 if (zero) { 560 /* 561 * We are writing zero's to dstfd2 562 */ 563 io_prep_pwrite(io, dstfd2, io->u.c.buf, 564 leftover, offset); 565 io_set_callback(io, wr_done); 566 } else { 567 io_prep_pread(io, srcfd2, io->u.c.buf, 568 leftover, offset); 569 io_set_callback(io, rd_done); 570 } 571 rc = io_submit(myctx, 1, &io); 572 if (rc < 0) 573 io_error("io_submit", rc); 574 count_io_q_waits++; 575 rc = io_wait_run(myctx, 0); 576 if (rc < 0) 577 io_error("io_wait_run", rc); 578 } 579 580 if (srcfd != -1) 581 close(srcfd); 582 if (dstfd != -1) 583 close(dstfd); 584 exit(0); 585 } 586 587 /* 588 * Results look like: 589 * [alanm@toolbox ~/MOT3]$ ../taio -d kernel-source-2.4.8-0.4g.ppc.rpm abc 590 * rrrrrrrrrrrrrrrwwwrwrrwwrrwrwwrrwrwrwwrrwrwrrrrwwrwwwrrwrrrwwwwwwwwwwwwwwwww 591 * rrrrrrrrrrrrrrwwwrrwrwrwrwrrwwwwwwwwwwwwwwrrrrrrrrrrrrrrrrrrwwwwrwrwwrwrwrwr 592 * wrrrrrrrwwwwwwwwwwwwwrrrwrrrwrrwrwwwwwwwwwwrrrrwwrwrrrrrrrrrrrwwwwwwwwwwwrww 593 * wwwrrrrrrrrwwrrrwwrwrwrwwwrrrrrrrwwwrrwwwrrwrwwwwwwwwrrrrrrrwwwrrrrrrrwwwwww 594 * wwwwwwwrwrrrrrrrrwrrwrrwrrwrwrrrwrrrwrrrwrwwwwwwwwwwwwwwwwwwrrrwwwrrrrrrrrrr 595 * rrwrrrrrrwrrwwwwwwwwwwwwwwwwrwwwrrwrwwrrrrrrrrrrrrrrrrrrrwwwwwwwwwwwwwwwwwww 596 * rrrrrwrrwrwrwrrwrrrwwwwwwwwrrrrwrrrwrwwrwrrrwrrwrrrrwwwwwwwrwrwwwwrwwrrrwrrr 597 * rrrwwwwwwwrrrrwwrrrrrrrrrrrrwrwrrrrwwwwwwwwwwwwwwrwrrrrwwwwrwrrrrwrwwwrrrwww 598 * rwwrrrrrrrwrrrrrrrrrrrrwwwwrrrwwwrwrrwwwwwwwwwwwwwwwwwwwwwrrrrrrrwwwwwwwrw 599 */ 600 601 #else 602 int main(void) 603 { 604 fprintf(stderr, "test requires libaio and it's development packages\n"); 605 return TCONF; 606 } 607 #endif 608