1 /* 2 * splice engine 3 * 4 * IO engine that transfers data by doing splices to/from pipes and 5 * the files. 6 * 7 */ 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <unistd.h> 11 #include <errno.h> 12 #include <assert.h> 13 #include <sys/poll.h> 14 #include <sys/mman.h> 15 16 #include "../fio.h" 17 18 struct spliceio_data { 19 int pipe[2]; 20 int vmsplice_to_user; 21 int vmsplice_to_user_map; 22 }; 23 24 /* 25 * vmsplice didn't use to support splicing to user space, this is the old 26 * variant of getting that job done. Doesn't make a lot of sense, but it 27 * uses splices to move data from the source into a pipe. 28 */ 29 static int fio_splice_read_old(struct thread_data *td, struct io_u *io_u) 30 { 31 struct spliceio_data *sd = td->io_ops_data; 32 struct fio_file *f = io_u->file; 33 int ret, ret2, buflen; 34 off_t offset; 35 void *p; 36 37 offset = io_u->offset; 38 buflen = io_u->xfer_buflen; 39 p = io_u->xfer_buf; 40 while (buflen) { 41 int this_len = buflen; 42 43 if (this_len > SPLICE_DEF_SIZE) 44 this_len = SPLICE_DEF_SIZE; 45 46 ret = splice(f->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE); 47 if (ret < 0) { 48 if (errno == ENODATA || errno == EAGAIN) 49 continue; 50 51 return -errno; 52 } 53 54 buflen -= ret; 55 56 while (ret) { 57 ret2 = read(sd->pipe[0], p, ret); 58 if (ret2 < 0) 59 return -errno; 60 61 ret -= ret2; 62 p += ret2; 63 } 64 } 65 66 return io_u->xfer_buflen; 67 } 68 69 /* 70 * We can now vmsplice into userspace, so do the transfer by splicing into 71 * a pipe and vmsplicing that into userspace. 72 */ 73 static int fio_splice_read(struct thread_data *td, struct io_u *io_u) 74 { 75 struct spliceio_data *sd = td->io_ops_data; 76 struct fio_file *f = io_u->file; 77 struct iovec iov; 78 int ret , buflen, mmap_len; 79 off_t offset; 80 void *p, *map; 81 82 ret = 0; 83 offset = io_u->offset; 84 mmap_len = buflen = io_u->xfer_buflen; 85 86 if (sd->vmsplice_to_user_map) { 87 map = mmap(io_u->xfer_buf, buflen, PROT_READ, MAP_PRIVATE|OS_MAP_ANON, 0, 0); 88 if (map == MAP_FAILED) { 89 td_verror(td, errno, "mmap io_u"); 90 return -1; 91 } 92 93 p = map; 94 } else { 95 map = NULL; 96 p = io_u->xfer_buf; 97 } 98 99 while (buflen) { 100 int this_len = buflen; 101 int flags = 0; 102 103 if (this_len > SPLICE_DEF_SIZE) { 104 this_len = SPLICE_DEF_SIZE; 105 flags = SPLICE_F_MORE; 106 } 107 108 ret = splice(f->fd, &offset, sd->pipe[1], NULL, this_len,flags); 109 if (ret < 0) { 110 if (errno == ENODATA || errno == EAGAIN) 111 continue; 112 113 td_verror(td, errno, "splice-from-fd"); 114 break; 115 } 116 117 buflen -= ret; 118 iov.iov_base = p; 119 iov.iov_len = ret; 120 121 while (iov.iov_len) { 122 ret = vmsplice(sd->pipe[0], &iov, 1, SPLICE_F_MOVE); 123 if (ret < 0) { 124 if (errno == EFAULT && 125 sd->vmsplice_to_user_map) { 126 sd->vmsplice_to_user_map = 0; 127 munmap(map, mmap_len); 128 map = NULL; 129 p = io_u->xfer_buf; 130 iov.iov_base = p; 131 continue; 132 } 133 if (errno == EBADF) { 134 ret = -EBADF; 135 break; 136 } 137 td_verror(td, errno, "vmsplice"); 138 break; 139 } else if (!ret) { 140 td_verror(td, ENODATA, "vmsplice"); 141 ret = -1; 142 break; 143 } 144 145 iov.iov_len -= ret; 146 iov.iov_base += ret; 147 p += ret; 148 } 149 if (ret < 0) 150 break; 151 } 152 153 if (sd->vmsplice_to_user_map && munmap(map, mmap_len) < 0) { 154 td_verror(td, errno, "munnap io_u"); 155 return -1; 156 } 157 if (ret < 0) 158 return ret; 159 160 return io_u->xfer_buflen; 161 } 162 163 /* 164 * For splice writing, we can vmsplice our data buffer directly into a 165 * pipe and then splice that to a file. 166 */ 167 static int fio_splice_write(struct thread_data *td, struct io_u *io_u) 168 { 169 struct spliceio_data *sd = td->io_ops_data; 170 struct iovec iov = { 171 .iov_base = io_u->xfer_buf, 172 .iov_len = io_u->xfer_buflen, 173 }; 174 struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, }; 175 struct fio_file *f = io_u->file; 176 off_t off = io_u->offset; 177 int ret, ret2; 178 179 while (iov.iov_len) { 180 if (poll(&pfd, 1, -1) < 0) 181 return errno; 182 183 ret = vmsplice(sd->pipe[1], &iov, 1, SPLICE_F_NONBLOCK); 184 if (ret < 0) 185 return -errno; 186 187 iov.iov_len -= ret; 188 iov.iov_base += ret; 189 190 while (ret) { 191 ret2 = splice(sd->pipe[0], NULL, f->fd, &off, ret, 0); 192 if (ret2 < 0) 193 return -errno; 194 195 ret -= ret2; 196 } 197 } 198 199 return io_u->xfer_buflen; 200 } 201 202 static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u) 203 { 204 struct spliceio_data *sd = td->io_ops_data; 205 int ret = 0; 206 207 fio_ro_check(td, io_u); 208 209 if (io_u->ddir == DDIR_READ) { 210 if (sd->vmsplice_to_user) { 211 ret = fio_splice_read(td, io_u); 212 /* 213 * This kernel doesn't support vmsplice to user 214 * space. Reset the vmsplice_to_user flag, so that 215 * we retry below and don't hit this path again. 216 */ 217 if (ret == -EBADF) 218 sd->vmsplice_to_user = 0; 219 } 220 if (!sd->vmsplice_to_user) 221 ret = fio_splice_read_old(td, io_u); 222 } else if (io_u->ddir == DDIR_WRITE) 223 ret = fio_splice_write(td, io_u); 224 else if (io_u->ddir == DDIR_TRIM) 225 ret = do_io_u_trim(td, io_u); 226 else 227 ret = do_io_u_sync(td, io_u); 228 229 if (ret != (int) io_u->xfer_buflen) { 230 if (ret >= 0) { 231 io_u->resid = io_u->xfer_buflen - ret; 232 io_u->error = 0; 233 return FIO_Q_COMPLETED; 234 } else 235 io_u->error = errno; 236 } 237 238 if (io_u->error) { 239 td_verror(td, io_u->error, "xfer"); 240 if (io_u->error == EINVAL) 241 log_err("fio: looks like splice doesn't work on this" 242 " file system\n"); 243 } 244 245 return FIO_Q_COMPLETED; 246 } 247 248 static void fio_spliceio_cleanup(struct thread_data *td) 249 { 250 struct spliceio_data *sd = td->io_ops_data; 251 252 if (sd) { 253 close(sd->pipe[0]); 254 close(sd->pipe[1]); 255 free(sd); 256 } 257 } 258 259 static int fio_spliceio_init(struct thread_data *td) 260 { 261 struct spliceio_data *sd = malloc(sizeof(*sd)); 262 263 if (pipe(sd->pipe) < 0) { 264 td_verror(td, errno, "pipe"); 265 free(sd); 266 return 1; 267 } 268 269 /* 270 * Assume this work, we'll reset this if it doesn't 271 */ 272 sd->vmsplice_to_user = 1; 273 274 /* 275 * Works with "real" vmsplice to user, eg mapping pages directly. 276 * Reset if we fail. 277 */ 278 sd->vmsplice_to_user_map = 1; 279 280 /* 281 * And if vmsplice_to_user works, we definitely need aligned 282 * buffers. Just set ->odirect to force that. 283 */ 284 if (td_read(td)) 285 td->o.mem_align = 1; 286 287 td->io_ops_data = sd; 288 return 0; 289 } 290 291 static struct ioengine_ops ioengine = { 292 .name = "splice", 293 .version = FIO_IOOPS_VERSION, 294 .init = fio_spliceio_init, 295 .queue = fio_spliceio_queue, 296 .cleanup = fio_spliceio_cleanup, 297 .open_file = generic_open_file, 298 .close_file = generic_close_file, 299 .get_file_size = generic_get_file_size, 300 .flags = FIO_SYNCIO | FIO_PIPEIO, 301 }; 302 303 static void fio_init fio_spliceio_register(void) 304 { 305 register_ioengine(&ioengine); 306 } 307 308 static void fio_exit fio_spliceio_unregister(void) 309 { 310 unregister_ioengine(&ioengine); 311 } 312