Home | History | Annotate | Download | only in ltp-aiodio
      1 /*
      2  * version of copy command using async i/o
      3  * From:	Stephen Hemminger <shemminger (at) osdl.org>
      4  * Modified by Daniel McNeil <daniel (at) osdl.org> for testing aio.
      5  *	- added -a alignment
      6  *	- added -b blksize option
      7  *	_ added -s size	option
      8  *	- added -f open_flag option
      9  *	- added -w (no write) option (reads from source only)
     10  *	- added -n (num aio) option
     11  *	- added -z (zero dest) opton (writes zeros to dest only)
     12  *	- added -D delay_ms option
     13  *
     14  * Copy file by using a async I/O state machine.
     15  * 1. Start read request
     16  * 2. When read completes turn it into a write request
     17  * 3. When write completes decrement counter and free resources
     18  *
     19  *
     20  * Usage: aiocp [-b blksize] -n [num_aio] [-w] [-z] [-s filesize]
     21  *		[-f DIRECT|TRUNC|CREAT|SYNC|LARGEFILE] src dest
     22  */
     23 
     24 #define _GNU_SOURCE
     25 
     26 #include <unistd.h>
     27 #include <stdio.h>
     28 #include <sys/types.h>
     29 #include <sys/stat.h>
     30 #include <sys/param.h>
     31 #include <fcntl.h>
     32 #include <errno.h>
     33 #include <stdlib.h>
     34 #include <mntent.h>
     35 #include <sys/select.h>
     36 #include <sys/mount.h>
     37 #include "config.h"
     38 
     39 #if HAVE_LIBAIO_H
     40 
     41 #include <libaio.h>
     42 
     43 #define AIO_BLKSIZE	(64*1024)
     44 #define AIO_MAXIO	32
     45 
     46 static int aio_blksize = AIO_BLKSIZE;
     47 static int aio_maxio = AIO_MAXIO;
     48 
     49 static int busy = 0;		// # of I/O's in flight
     50 static int tocopy = 0;		// # of blocks left to copy
     51 static int srcfd;		// source fd
     52 static int srcfd2;		// source fd - end of file non-sector
     53 static int dstfd = -1;		// destination file descriptor
     54 static int dstfd2 = -1;		// Handle end of file for non-sector size
     55 static const char *dstname = NULL;
     56 static const char *srcname = NULL;
     57 static int source_open_flag = O_RDONLY;	/* open flags on source file */
     58 static int dest_open_flag = O_WRONLY;	/* open flags on dest file */
     59 static int no_write;		/* do not write */
     60 static int zero;		/* write zero's only */
     61 
     62 static int debug;
     63 static int count_io_q_waits;	/* how many time io_queue_wait called */
     64 
     65 struct iocb **iocb_free;	/* array of pointers to iocb */
     66 int iocb_free_count;		/* current free count */
     67 int alignment = 512;		/* buffer alignment */
     68 
     69 struct timeval delay;		/* delay between i/o */
     70 
     71 static int dev_block_size_by_path(const char *path)
     72 {
     73 	FILE *f;
     74 	struct mntent *mnt;
     75 	size_t prefix_len, prefix_max = 0;
     76 	char dev_name[1024];
     77 	int fd, size;
     78 
     79 	if (!path)
     80 		return 0;
     81 
     82 	f = setmntent("/proc/mounts", "r");
     83 	if (!f) {
     84 		fprintf(stderr, "Failed to open /proc/mounts\n");
     85 		return 0;
     86 	}
     87 
     88 	while ((mnt = getmntent(f))) {
     89 		/* Skip pseudo fs */
     90 		if (mnt->mnt_fsname[0] != '/')
     91 			continue;
     92 
     93 		prefix_len = strlen(mnt->mnt_dir);
     94 
     95 		if (prefix_len > prefix_max &&
     96 		    !strncmp(path, mnt->mnt_dir, prefix_len)) {
     97 			prefix_max = prefix_len;
     98 			strncpy(dev_name, mnt->mnt_fsname, sizeof(dev_name));
     99 			dev_name[sizeof(dev_name)-1] = '\0';
    100 		}
    101 	}
    102 
    103 	endmntent(f);
    104 
    105 	if (!prefix_max) {
    106 		fprintf(stderr, "Path '%s' not found in /proc/mounts\n", path);
    107 		return 0;
    108 	}
    109 
    110 	printf("Path '%s' is on device '%s'\n", path, dev_name);
    111 
    112 	fd = open(dev_name, O_RDONLY);
    113 	if (!fd) {
    114 		fprintf(stderr, "open('%s'): %s\n", dev_name, strerror(errno));
    115 		return 0;
    116 	}
    117 
    118 	if (ioctl(fd, BLKSSZGET, &size)) {
    119 		fprintf(stderr, "ioctl(BLKSSZGET): %s\n", strerror(errno));
    120 		close(fd);
    121 		return 0;
    122 	}
    123 
    124 	close(fd);
    125 	printf("'%s' has block size %i\n", dev_name, size);
    126 
    127 	return size;
    128 }
    129 
    130 int init_iocb(int n, int iosize)
    131 {
    132 	void *buf;
    133 	int i;
    134 
    135 	if ((iocb_free = malloc(n * sizeof(struct iocb *))) == 0) {
    136 		return -1;
    137 	}
    138 
    139 	for (i = 0; i < n; i++) {
    140 		if (!
    141 		    (iocb_free[i] = malloc(sizeof(struct iocb))))
    142 			return -1;
    143 		if (posix_memalign(&buf, alignment, iosize))
    144 			return -1;
    145 		if (debug > 1) {
    146 			printf("buf allocated at 0x%p, align:%d\n",
    147 			       buf, alignment);
    148 		}
    149 		if (zero) {
    150 			/*
    151 			 * We are writing zero's to dstfd
    152 			 */
    153 			memset(buf, 0, iosize);
    154 		}
    155 		io_prep_pread(iocb_free[i], -1, buf, iosize, 0);
    156 	}
    157 	iocb_free_count = i;
    158 	return 0;
    159 }
    160 
    161 static struct iocb *alloc_iocb(void)
    162 {
    163 	if (!iocb_free_count)
    164 		return 0;
    165 	return iocb_free[--iocb_free_count];
    166 }
    167 
    168 void free_iocb(struct iocb *io)
    169 {
    170 	iocb_free[iocb_free_count++] = io;
    171 }
    172 
    173 /*
    174  * io_wait_run() - wait for an io_event and then call the callback.
    175  */
    176 int io_wait_run(io_context_t ctx, struct timespec *to)
    177 {
    178 	struct io_event events[aio_maxio];
    179 	struct io_event *ep;
    180 	int ret, n;
    181 
    182 	/*
    183 	 * get up to aio_maxio events at a time.
    184 	 */
    185 	ret = n = io_getevents(ctx, 1, aio_maxio, events, to);
    186 
    187 	/*
    188 	 * Call the callback functions for each event.
    189 	 */
    190 	for (ep = events; n-- > 0; ep++) {
    191 		io_callback_t cb = (io_callback_t) ep->data;
    192 		struct iocb *iocb = ep->obj;
    193 
    194 		if (debug > 1) {
    195 			fprintf(stderr, "ev:%p iocb:%p res:%ld res2:%ld\n",
    196 				ep, iocb, ep->res, ep->res2);
    197 		}
    198 		cb(ctx, iocb, ep->res, ep->res2);
    199 	}
    200 	return ret;
    201 }
    202 
    203 /* Fatal error handler */
    204 static void io_error(const char *func, int rc)
    205 {
    206 	if (rc == -ENOSYS)
    207 		fprintf(stderr, "AIO not in this kernel\n");
    208 	else if (rc < 0)
    209 		fprintf(stderr, "%s: %s\n", func, strerror(-rc));
    210 	else
    211 		fprintf(stderr, "%s: error %d\n", func, rc);
    212 
    213 	if (dstfd > 0)
    214 		close(dstfd);
    215 	if (dstname && dest_open_flag & O_CREAT)
    216 		unlink(dstname);
    217 	exit(1);
    218 }
    219 
    220 /*
    221  * Write complete callback.
    222  * Adjust counts and free resources
    223  */
    224 static void wr_done(io_context_t ctx, struct iocb *iocb, long res, long res2)
    225 {
    226 	if (res2 != 0) {
    227 		io_error("aio write", res2);
    228 	}
    229 	if (res != iocb->u.c.nbytes) {
    230 		fprintf(stderr, "write missed bytes expect %lu got %ld\n",
    231 			iocb->u.c.nbytes, res);
    232 		exit(1);
    233 	}
    234 	--tocopy;
    235 	--busy;
    236 	free_iocb(iocb);
    237 	if (debug)
    238 		write(2, "w", 1);
    239 }
    240 
    241 /*
    242  * Read complete callback.
    243  * Change read iocb into a write iocb and start it.
    244  */
    245 static void rd_done(io_context_t ctx, struct iocb *iocb, long res, long res2)
    246 {
    247 	/* library needs accessors to look at iocb? */
    248 	int iosize = iocb->u.c.nbytes;
    249 	char *buf = iocb->u.c.buf;
    250 	off_t offset = iocb->u.c.offset;
    251 
    252 	if (res2 != 0)
    253 		io_error("aio read", res2);
    254 	if (res != iosize) {
    255 		fprintf(stderr, "read missing bytes expect %lu got %ld\n",
    256 			iocb->u.c.nbytes, res);
    257 		exit(1);
    258 	}
    259 
    260 	/* turn read into write */
    261 	if (no_write) {
    262 		--tocopy;
    263 		--busy;
    264 		free_iocb(iocb);
    265 	} else {
    266 		int fd;
    267 		if (iocb->aio_fildes == srcfd)
    268 			fd = dstfd;
    269 		else
    270 			fd = dstfd2;
    271 		io_prep_pwrite(iocb, fd, buf, iosize, offset);
    272 		io_set_callback(iocb, wr_done);
    273 		if (1 != (res = io_submit(ctx, 1, &iocb)))
    274 			io_error("io_submit write", res);
    275 	}
    276 	if (debug)
    277 		write(2, "r", 1);
    278 	if (debug > 1)
    279 		printf("%d", iosize);
    280 }
    281 
    282 static void usage(void)
    283 {
    284 	fprintf(stderr,
    285 		"Usage: aiocp [-a align] [-s size] [-b blksize] [-n num_io]"
    286 		" [-f open_flag] SOURCE DEST\n"
    287 		"This copies from SOURCE to DEST using AIO.\n\n"
    288 		"Usage: aiocp [options] -w SOURCE\n"
    289 		"This does sequential AIO reads (no writes).\n\n"
    290 		"Usage: aiocp [options] -z DEST\n"
    291 		"This does sequential AIO writes of zeros.\n");
    292 
    293 	exit(1);
    294 }
    295 
    296 /*
    297  * Scale value by kilo, mega, or giga.
    298  */
    299 long long scale_by_kmg(long long value, char scale)
    300 {
    301 	switch (scale) {
    302 	case 'g':
    303 	case 'G':
    304 		value *= 1024;
    305 	case 'm':
    306 	case 'M':
    307 		value *= 1024;
    308 	case 'k':
    309 	case 'K':
    310 		value *= 1024;
    311 		break;
    312 	case '\0':
    313 		break;
    314 	default:
    315 		usage();
    316 		break;
    317 	}
    318 	return value;
    319 }
    320 
    321 int main(int argc, char *const *argv)
    322 {
    323 	struct stat st;
    324 	off_t length = 0, offset = 0;
    325 	off_t leftover = 0;
    326 	io_context_t myctx;
    327 	int c;
    328 	extern char *optarg;
    329 	extern int optind, opterr, optopt;
    330 
    331 	while ((c = getopt(argc, argv, "a:b:df:n:s:wzD:")) != -1) {
    332 		char *endp;
    333 
    334 		switch (c) {
    335 		case 'a':	/* alignment of data buffer */
    336 			alignment = strtol(optarg, &endp, 0);
    337 			alignment = (long)scale_by_kmg((long long)alignment,
    338 						       *endp);
    339 			break;
    340 		case 'f':	/* use these open flags */
    341 			if (strcmp(optarg, "LARGEFILE") == 0 ||
    342 			    strcmp(optarg, "O_LARGEFILE") == 0) {
    343 				source_open_flag |= O_LARGEFILE;
    344 				dest_open_flag |= O_LARGEFILE;
    345 			} else if (strcmp(optarg, "TRUNC") == 0 ||
    346 				   strcmp(optarg, "O_TRUNC") == 0) {
    347 				dest_open_flag |= O_TRUNC;
    348 			} else if (strcmp(optarg, "SYNC") == 0 ||
    349 				   strcmp(optarg, "O_SYNC") == 0) {
    350 				dest_open_flag |= O_SYNC;
    351 			} else if (strcmp(optarg, "DIRECT") == 0 ||
    352 				   strcmp(optarg, "O_DIRECT") == 0) {
    353 				source_open_flag |= O_DIRECT;
    354 				dest_open_flag |= O_DIRECT;
    355 			} else if (strncmp(optarg, "CREAT", 5) == 0 ||
    356 				   strncmp(optarg, "O_CREAT", 5) == 0) {
    357 				dest_open_flag |= O_CREAT;
    358 			}
    359 			break;
    360 		case 'd':
    361 			debug++;
    362 			break;
    363 		case 'D':
    364 			delay.tv_usec = atoi(optarg);
    365 			break;
    366 		case 'b':	/* block size */
    367 			aio_blksize = strtol(optarg, &endp, 0);
    368 			aio_blksize =
    369 			    (long)scale_by_kmg((long long)aio_blksize, *endp);
    370 			break;
    371 
    372 		case 'n':	/* num io */
    373 			aio_maxio = strtol(optarg, &endp, 0);
    374 			break;
    375 		case 's':	/* size to transfer */
    376 			length = strtoll(optarg, &endp, 0);
    377 			length = scale_by_kmg(length, *endp);
    378 			break;
    379 		case 'w':	/* no write */
    380 			no_write = 1;
    381 			break;
    382 		case 'z':	/* write zero's */
    383 			zero = 1;
    384 			break;
    385 
    386 		default:
    387 			usage();
    388 		}
    389 	}
    390 
    391 	argc -= optind;
    392 	argv += optind;
    393 
    394 	if (argc < 1) {
    395 		usage();
    396 	}
    397 	if (!zero) {
    398 		if ((srcfd = open(srcname = *argv, source_open_flag)) < 0) {
    399 			perror(srcname);
    400 			exit(1);
    401 		}
    402 		argv++;
    403 		argc--;
    404 		if (fstat(srcfd, &st) < 0) {
    405 			perror("fstat");
    406 			exit(1);
    407 		}
    408 		if (length == 0)
    409 			length = st.st_size;
    410 	}
    411 
    412 	if (!no_write) {
    413 		/*
    414 		 * We are either copying or writing zeros to dstname
    415 		 */
    416 		if (argc < 1) {
    417 			usage();
    418 		}
    419 		if ((dstfd = open(dstname = *argv, dest_open_flag, 0666)) < 0) {
    420 			perror(dstname);
    421 			exit(1);
    422 		}
    423 		if (zero) {
    424 			/*
    425 			 * get size of dest, if we are zeroing it.
    426 			 * TODO: handle devices.
    427 			 */
    428 			if (fstat(dstfd, &st) < 0) {
    429 				perror("fstat");
    430 				exit(1);
    431 			}
    432 			if (length == 0)
    433 				length = st.st_size;
    434 		}
    435 	}
    436 	/*
    437 	 * O_DIRECT cannot handle non-sector sizes
    438 	 */
    439 	if (dest_open_flag & O_DIRECT) {
    440 		int src_alignment = dev_block_size_by_path(srcname);
    441 		int dst_alignment = dev_block_size_by_path(dstname);
    442 
    443 		/*
    444 		 * Given we expect the block sizes to be multiple of 2 the
    445 		 * larger is always divideable by the smaller, so we only need
    446 		 * to care about maximum.
    447 		 */
    448 		if (src_alignment > dst_alignment)
    449 			dst_alignment = src_alignment;
    450 
    451 		if (alignment < dst_alignment) {
    452 			alignment = dst_alignment;
    453 			printf("Forcing aligment to %i\n", alignment);
    454 		}
    455 
    456 		if (aio_blksize % alignment) {
    457 			printf("Block size is not multiple of drive block size\n");
    458 			printf("Skipping the test!\n");
    459 			exit(0);
    460 		}
    461 
    462 		leftover = length % alignment;
    463 		if (leftover) {
    464 			int flag;
    465 
    466 			length -= leftover;
    467 			if (!zero) {
    468 				flag = source_open_flag & ~O_DIRECT;
    469 				srcfd2 = open(srcname, flag);
    470 				if (srcfd2 < 0) {
    471 					perror(srcname);
    472 					exit(1);
    473 				}
    474 			}
    475 			if (!no_write) {
    476 				flag = (O_SYNC | dest_open_flag) &
    477 				    ~(O_DIRECT | O_CREAT);
    478 				dstfd2 = open(dstname, flag);
    479 				if (dstfd2 < 0) {
    480 					perror(dstname);
    481 					exit(1);
    482 				}
    483 			}
    484 		}
    485 	}
    486 
    487 	/* initialize state machine */
    488 	memset(&myctx, 0, sizeof(myctx));
    489 	io_queue_init(aio_maxio, &myctx);
    490 	tocopy = howmany(length, aio_blksize);
    491 
    492 	if (init_iocb(aio_maxio, aio_blksize) < 0) {
    493 		fprintf(stderr, "Error allocating the i/o buffers\n");
    494 		exit(1);
    495 	}
    496 
    497 	while (tocopy > 0) {
    498 		int i, rc;
    499 		/* Submit as many reads as once as possible upto aio_maxio */
    500 		int n = MIN(MIN(aio_maxio - busy, aio_maxio),
    501 			    howmany(length - offset, aio_blksize));
    502 		if (n > 0) {
    503 			struct iocb *ioq[n];
    504 
    505 			for (i = 0; i < n; i++) {
    506 				struct iocb *io = alloc_iocb();
    507 				int iosize = MIN(length - offset, aio_blksize);
    508 
    509 				if (zero) {
    510 					/*
    511 					 * We are writing zero's to dstfd
    512 					 */
    513 					io_prep_pwrite(io, dstfd, io->u.c.buf,
    514 						       iosize, offset);
    515 					io_set_callback(io, wr_done);
    516 				} else {
    517 					io_prep_pread(io, srcfd, io->u.c.buf,
    518 						      iosize, offset);
    519 					io_set_callback(io, rd_done);
    520 				}
    521 				ioq[i] = io;
    522 				offset += iosize;
    523 			}
    524 
    525 			rc = io_submit(myctx, n, ioq);
    526 			if (rc < 0)
    527 				io_error("io_submit", rc);
    528 
    529 			busy += n;
    530 			if (debug > 1)
    531 				printf("io_submit(%d) busy:%d\n", n, busy);
    532 			if (delay.tv_usec) {
    533 				struct timeval t = delay;
    534 				(void)select(0, 0, 0, 0, &t);
    535 			}
    536 		}
    537 
    538 		/*
    539 		 * We have submitted all the i/o requests. Wait for at least one to complete
    540 		 * and call the callbacks.
    541 		 */
    542 		count_io_q_waits++;
    543 		rc = io_wait_run(myctx, 0);
    544 		if (rc < 0)
    545 			io_error("io_wait_run", rc);
    546 
    547 		if (debug > 1) {
    548 			printf("io_wait_run: rc == %d\n", rc);
    549 			printf("busy:%d aio_maxio:%d tocopy:%d\n",
    550 			       busy, aio_maxio, tocopy);
    551 		}
    552 	}
    553 
    554 	if (leftover) {
    555 		/* non-sector size end of file */
    556 		struct iocb *io = alloc_iocb();
    557 		int rc;
    558 		if (zero) {
    559 			/*
    560 			 * We are writing zero's to dstfd2
    561 			 */
    562 			io_prep_pwrite(io, dstfd2, io->u.c.buf,
    563 				       leftover, offset);
    564 			io_set_callback(io, wr_done);
    565 		} else {
    566 			io_prep_pread(io, srcfd2, io->u.c.buf,
    567 				      leftover, offset);
    568 			io_set_callback(io, rd_done);
    569 		}
    570 		rc = io_submit(myctx, 1, &io);
    571 		if (rc < 0)
    572 			io_error("io_submit", rc);
    573 		count_io_q_waits++;
    574 		rc = io_wait_run(myctx, 0);
    575 		if (rc < 0)
    576 			io_error("io_wait_run", rc);
    577 	}
    578 
    579 	if (srcfd != -1)
    580 		close(srcfd);
    581 	if (dstfd != -1)
    582 		close(dstfd);
    583 	exit(0);
    584 }
    585 
    586 /*
    587  * Results look like:
    588  * [alanm@toolbox ~/MOT3]$ ../taio -d kernel-source-2.4.8-0.4g.ppc.rpm abc
    589  * rrrrrrrrrrrrrrrwwwrwrrwwrrwrwwrrwrwrwwrrwrwrrrrwwrwwwrrwrrrwwwwwwwwwwwwwwwww
    590  * rrrrrrrrrrrrrrwwwrrwrwrwrwrrwwwwwwwwwwwwwwrrrrrrrrrrrrrrrrrrwwwwrwrwwrwrwrwr
    591  * wrrrrrrrwwwwwwwwwwwwwrrrwrrrwrrwrwwwwwwwwwwrrrrwwrwrrrrrrrrrrrwwwwwwwwwwwrww
    592  * wwwrrrrrrrrwwrrrwwrwrwrwwwrrrrrrrwwwrrwwwrrwrwwwwwwwwrrrrrrrwwwrrrrrrrwwwwww
    593  * wwwwwwwrwrrrrrrrrwrrwrrwrrwrwrrrwrrrwrrrwrwwwwwwwwwwwwwwwwwwrrrwwwrrrrrrrrrr
    594  * rrwrrrrrrwrrwwwwwwwwwwwwwwwwrwwwrrwrwwrrrrrrrrrrrrrrrrrrrwwwwwwwwwwwwwwwwwww
    595  * rrrrrwrrwrwrwrrwrrrwwwwwwwwrrrrwrrrwrwwrwrrrwrrwrrrrwwwwwwwrwrwwwwrwwrrrwrrr
    596  * rrrwwwwwwwrrrrwwrrrrrrrrrrrrwrwrrrrwwwwwwwwwwwwwwrwrrrrwwwwrwrrrrwrwwwrrrwww
    597  * rwwrrrrrrrwrrrrrrrrrrrrwwwwrrrwwwrwrrwwwwwwwwwwwwwwwwwwwwwrrrrrrrwwwwwwwrw
    598  */
    599 
    600 #else
    601 
    602 int main(void)
    603 {
    604 	fprintf(stderr, "System doesn't have libaio support.\n");
    605 	return 1;
    606 }
    607 
    608 #endif
    609