Home | History | Annotate | Download | only in engines
      1 /*
      2  * splice engine
      3  *
      4  * IO engine that transfers data by doing splices to/from pipes and
      5  * the files.
      6  *
      7  */
      8 #include <stdio.h>
      9 #include <stdlib.h>
     10 #include <unistd.h>
     11 #include <errno.h>
     12 #include <assert.h>
     13 #include <sys/poll.h>
     14 #include <sys/mman.h>
     15 
     16 #include "../fio.h"
     17 
     18 struct spliceio_data {
     19 	int pipe[2];
     20 	int vmsplice_to_user;
     21 	int vmsplice_to_user_map;
     22 };
     23 
     24 /*
     25  * vmsplice didn't use to support splicing to user space, this is the old
     26  * variant of getting that job done. Doesn't make a lot of sense, but it
     27  * uses splices to move data from the source into a pipe.
     28  */
     29 static int fio_splice_read_old(struct thread_data *td, struct io_u *io_u)
     30 {
     31 	struct spliceio_data *sd = td->io_ops_data;
     32 	struct fio_file *f = io_u->file;
     33 	int ret, ret2, buflen;
     34 	off_t offset;
     35 	void *p;
     36 
     37 	offset = io_u->offset;
     38 	buflen = io_u->xfer_buflen;
     39 	p = io_u->xfer_buf;
     40 	while (buflen) {
     41 		int this_len = buflen;
     42 
     43 		if (this_len > SPLICE_DEF_SIZE)
     44 			this_len = SPLICE_DEF_SIZE;
     45 
     46 		ret = splice(f->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE);
     47 		if (ret < 0) {
     48 			if (errno == ENODATA || errno == EAGAIN)
     49 				continue;
     50 
     51 			return -errno;
     52 		}
     53 
     54 		buflen -= ret;
     55 
     56 		while (ret) {
     57 			ret2 = read(sd->pipe[0], p, ret);
     58 			if (ret2 < 0)
     59 				return -errno;
     60 
     61 			ret -= ret2;
     62 			p += ret2;
     63 		}
     64 	}
     65 
     66 	return io_u->xfer_buflen;
     67 }
     68 
     69 /*
     70  * We can now vmsplice into userspace, so do the transfer by splicing into
     71  * a pipe and vmsplicing that into userspace.
     72  */
     73 static int fio_splice_read(struct thread_data *td, struct io_u *io_u)
     74 {
     75 	struct spliceio_data *sd = td->io_ops_data;
     76 	struct fio_file *f = io_u->file;
     77 	struct iovec iov;
     78 	int ret , buflen, mmap_len;
     79 	off_t offset;
     80 	void *p, *map;
     81 
     82 	ret = 0;
     83 	offset = io_u->offset;
     84 	mmap_len = buflen = io_u->xfer_buflen;
     85 
     86 	if (sd->vmsplice_to_user_map) {
     87 		map = mmap(io_u->xfer_buf, buflen, PROT_READ, MAP_PRIVATE|OS_MAP_ANON, 0, 0);
     88 		if (map == MAP_FAILED) {
     89 			td_verror(td, errno, "mmap io_u");
     90 			return -1;
     91 		}
     92 
     93 		p = map;
     94 	} else {
     95 		map = NULL;
     96 		p = io_u->xfer_buf;
     97 	}
     98 
     99 	while (buflen) {
    100 		int this_len = buflen;
    101 		int flags = 0;
    102 
    103 		if (this_len > SPLICE_DEF_SIZE) {
    104 			this_len = SPLICE_DEF_SIZE;
    105 			flags = SPLICE_F_MORE;
    106 		}
    107 
    108 		ret = splice(f->fd, &offset, sd->pipe[1], NULL, this_len,flags);
    109 		if (ret < 0) {
    110 			if (errno == ENODATA || errno == EAGAIN)
    111 				continue;
    112 
    113 			td_verror(td, errno, "splice-from-fd");
    114 			break;
    115 		}
    116 
    117 		buflen -= ret;
    118 		iov.iov_base = p;
    119 		iov.iov_len = ret;
    120 
    121 		while (iov.iov_len) {
    122 			ret = vmsplice(sd->pipe[0], &iov, 1, SPLICE_F_MOVE);
    123 			if (ret < 0) {
    124 				if (errno == EFAULT &&
    125 				    sd->vmsplice_to_user_map) {
    126 					sd->vmsplice_to_user_map = 0;
    127 					munmap(map, mmap_len);
    128 					map = NULL;
    129 					p = io_u->xfer_buf;
    130 					iov.iov_base = p;
    131 					continue;
    132 				}
    133 				if (errno == EBADF) {
    134 					ret = -EBADF;
    135 					break;
    136 				}
    137 				td_verror(td, errno, "vmsplice");
    138 				break;
    139 			} else if (!ret) {
    140 				td_verror(td, ENODATA, "vmsplice");
    141 				ret = -1;
    142 				break;
    143 			}
    144 
    145 			iov.iov_len -= ret;
    146 			iov.iov_base += ret;
    147 			p += ret;
    148 		}
    149 		if (ret < 0)
    150 			break;
    151 	}
    152 
    153 	if (sd->vmsplice_to_user_map && munmap(map, mmap_len) < 0) {
    154 		td_verror(td, errno, "munnap io_u");
    155 		return -1;
    156 	}
    157 	if (ret < 0)
    158 		return ret;
    159 
    160 	return io_u->xfer_buflen;
    161 }
    162 
    163 /*
    164  * For splice writing, we can vmsplice our data buffer directly into a
    165  * pipe and then splice that to a file.
    166  */
    167 static int fio_splice_write(struct thread_data *td, struct io_u *io_u)
    168 {
    169 	struct spliceio_data *sd = td->io_ops_data;
    170 	struct iovec iov = {
    171 		.iov_base = io_u->xfer_buf,
    172 		.iov_len = io_u->xfer_buflen,
    173 	};
    174 	struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, };
    175 	struct fio_file *f = io_u->file;
    176 	off_t off = io_u->offset;
    177 	int ret, ret2;
    178 
    179 	while (iov.iov_len) {
    180 		if (poll(&pfd, 1, -1) < 0)
    181 			return errno;
    182 
    183 		ret = vmsplice(sd->pipe[1], &iov, 1, SPLICE_F_NONBLOCK);
    184 		if (ret < 0)
    185 			return -errno;
    186 
    187 		iov.iov_len -= ret;
    188 		iov.iov_base += ret;
    189 
    190 		while (ret) {
    191 			ret2 = splice(sd->pipe[0], NULL, f->fd, &off, ret, 0);
    192 			if (ret2 < 0)
    193 				return -errno;
    194 
    195 			ret -= ret2;
    196 		}
    197 	}
    198 
    199 	return io_u->xfer_buflen;
    200 }
    201 
    202 static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u)
    203 {
    204 	struct spliceio_data *sd = td->io_ops_data;
    205 	int ret = 0;
    206 
    207 	fio_ro_check(td, io_u);
    208 
    209 	if (io_u->ddir == DDIR_READ) {
    210 		if (sd->vmsplice_to_user) {
    211 			ret = fio_splice_read(td, io_u);
    212 			/*
    213 			 * This kernel doesn't support vmsplice to user
    214 			 * space. Reset the vmsplice_to_user flag, so that
    215 			 * we retry below and don't hit this path again.
    216 			 */
    217 			if (ret == -EBADF)
    218 				sd->vmsplice_to_user = 0;
    219 		}
    220 		if (!sd->vmsplice_to_user)
    221 			ret = fio_splice_read_old(td, io_u);
    222 	} else if (io_u->ddir == DDIR_WRITE)
    223 		ret = fio_splice_write(td, io_u);
    224 	else if (io_u->ddir == DDIR_TRIM)
    225 		ret = do_io_u_trim(td, io_u);
    226 	else
    227 		ret = do_io_u_sync(td, io_u);
    228 
    229 	if (ret != (int) io_u->xfer_buflen) {
    230 		if (ret >= 0) {
    231 			io_u->resid = io_u->xfer_buflen - ret;
    232 			io_u->error = 0;
    233 			return FIO_Q_COMPLETED;
    234 		} else
    235 			io_u->error = errno;
    236 	}
    237 
    238 	if (io_u->error) {
    239 		td_verror(td, io_u->error, "xfer");
    240 		if (io_u->error == EINVAL)
    241 			log_err("fio: looks like splice doesn't work on this"
    242 					" file system\n");
    243 	}
    244 
    245 	return FIO_Q_COMPLETED;
    246 }
    247 
    248 static void fio_spliceio_cleanup(struct thread_data *td)
    249 {
    250 	struct spliceio_data *sd = td->io_ops_data;
    251 
    252 	if (sd) {
    253 		close(sd->pipe[0]);
    254 		close(sd->pipe[1]);
    255 		free(sd);
    256 	}
    257 }
    258 
    259 static int fio_spliceio_init(struct thread_data *td)
    260 {
    261 	struct spliceio_data *sd = malloc(sizeof(*sd));
    262 
    263 	if (pipe(sd->pipe) < 0) {
    264 		td_verror(td, errno, "pipe");
    265 		free(sd);
    266 		return 1;
    267 	}
    268 
    269 	/*
    270 	 * Assume this work, we'll reset this if it doesn't
    271 	 */
    272 	sd->vmsplice_to_user = 1;
    273 
    274 	/*
    275 	 * Works with "real" vmsplice to user, eg mapping pages directly.
    276 	 * Reset if we fail.
    277 	 */
    278 	sd->vmsplice_to_user_map = 1;
    279 
    280 	/*
    281 	 * And if vmsplice_to_user works, we definitely need aligned
    282 	 * buffers. Just set ->odirect to force that.
    283 	 */
    284 	if (td_read(td))
    285 		td->o.mem_align = 1;
    286 
    287 	td->io_ops_data = sd;
    288 	return 0;
    289 }
    290 
    291 static struct ioengine_ops ioengine = {
    292 	.name		= "splice",
    293 	.version	= FIO_IOOPS_VERSION,
    294 	.init		= fio_spliceio_init,
    295 	.queue		= fio_spliceio_queue,
    296 	.cleanup	= fio_spliceio_cleanup,
    297 	.open_file	= generic_open_file,
    298 	.close_file	= generic_close_file,
    299 	.get_file_size	= generic_get_file_size,
    300 	.flags		= FIO_SYNCIO | FIO_PIPEIO,
    301 };
    302 
    303 static void fio_init fio_spliceio_register(void)
    304 {
    305 	register_ioengine(&ioengine);
    306 }
    307 
    308 static void fio_exit fio_spliceio_unregister(void)
    309 {
    310 	unregister_ioengine(&ioengine);
    311 }
    312