1 /* 2 * device DAX engine 3 * 4 * IO engine that reads/writes from files by doing memcpy to/from 5 * a memory mapped region of DAX enabled device. 6 * 7 * Copyright (C) 2016 Intel Corp 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License, 11 * version 2 as published by the Free Software Foundation.. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 */ 19 20 /* 21 * device dax engine 22 * IO engine that access a DAX device directly for read and write data 23 * 24 * To use: 25 * ioengine=dev-dax 26 * 27 * Other relevant settings: 28 * iodepth=1 29 * direct=0 REQUIRED 30 * filename=/dev/daxN.N 31 * bs=2m 32 * 33 * direct should be left to 0. Using dev-dax implies that memory access 34 * is direct. However, dev-dax does not support O_DIRECT flag by design 35 * since it is not necessary. 36 * 37 * bs should adhere to the device dax alignment at minimally. 38 * 39 * libpmem.so 40 * By default, the dev-dax engine will let the system find the libpmem.so 41 * that it uses. You can use an alternative libpmem by setting the 42 * FIO_PMEM_LIB environment variable to the full path to the desired 43 * libpmem.so. 44 */ 45 46 #include <stdio.h> 47 #include <limits.h> 48 #include <stdlib.h> 49 #include <unistd.h> 50 #include <errno.h> 51 #include <sys/mman.h> 52 #include <sys/stat.h> 53 #include <sys/sysmacros.h> 54 #include <libgen.h> 55 #include <libpmem.h> 56 57 #include "../fio.h" 58 #include "../verify.h" 59 60 /* 61 * Limits us to 1GiB of mapped files in total to model after 62 * mmap engine behavior 63 */ 64 #define MMAP_TOTAL_SZ (1 * 1024 * 1024 * 1024UL) 65 66 struct fio_devdax_data { 67 void *devdax_ptr; 68 size_t devdax_sz; 69 off_t devdax_off; 70 }; 71 72 static int fio_devdax_file(struct thread_data *td, struct fio_file *f, 73 size_t length, off_t off) 74 { 75 struct fio_devdax_data *fdd = FILE_ENG_DATA(f); 76 int flags = 0; 77 78 if (td_rw(td)) 79 flags = PROT_READ | PROT_WRITE; 80 else if (td_write(td)) { 81 flags = PROT_WRITE; 82 83 if (td->o.verify != VERIFY_NONE) 84 flags |= PROT_READ; 85 } else 86 flags = PROT_READ; 87 88 fdd->devdax_ptr = mmap(NULL, length, flags, MAP_SHARED, f->fd, off); 89 if (fdd->devdax_ptr == MAP_FAILED) { 90 fdd->devdax_ptr = NULL; 91 td_verror(td, errno, "mmap"); 92 } 93 94 if (td->error && fdd->devdax_ptr) 95 munmap(fdd->devdax_ptr, length); 96 97 return td->error; 98 } 99 100 /* 101 * Just mmap an appropriate portion, we cannot mmap the full extent 102 */ 103 static int fio_devdax_prep_limited(struct thread_data *td, struct io_u *io_u) 104 { 105 struct fio_file *f = io_u->file; 106 struct fio_devdax_data *fdd = FILE_ENG_DATA(f); 107 108 if (io_u->buflen > f->real_file_size) { 109 log_err("dev-dax: bs too big for dev-dax engine\n"); 110 return EIO; 111 } 112 113 fdd->devdax_sz = min(MMAP_TOTAL_SZ, f->real_file_size); 114 if (fdd->devdax_sz > f->io_size) 115 fdd->devdax_sz = f->io_size; 116 117 fdd->devdax_off = io_u->offset; 118 119 return fio_devdax_file(td, f, fdd->devdax_sz, fdd->devdax_off); 120 } 121 122 /* 123 * Attempt to mmap the entire file 124 */ 125 static int fio_devdax_prep_full(struct thread_data *td, struct io_u *io_u) 126 { 127 struct fio_file *f = io_u->file; 128 struct fio_devdax_data *fdd = FILE_ENG_DATA(f); 129 int ret; 130 131 if (fio_file_partial_mmap(f)) 132 return EINVAL; 133 134 if (io_u->offset != (size_t) io_u->offset || 135 f->io_size != (size_t) f->io_size) { 136 fio_file_set_partial_mmap(f); 137 return EINVAL; 138 } 139 140 fdd->devdax_sz = f->io_size; 141 fdd->devdax_off = 0; 142 143 ret = fio_devdax_file(td, f, fdd->devdax_sz, fdd->devdax_off); 144 if (ret) 145 fio_file_set_partial_mmap(f); 146 147 return ret; 148 } 149 150 static int fio_devdax_prep(struct thread_data *td, struct io_u *io_u) 151 { 152 struct fio_file *f = io_u->file; 153 struct fio_devdax_data *fdd = FILE_ENG_DATA(f); 154 int ret; 155 156 /* 157 * It fits within existing mapping, use it 158 */ 159 if (io_u->offset >= fdd->devdax_off && 160 io_u->offset + io_u->buflen < fdd->devdax_off + fdd->devdax_sz) 161 goto done; 162 163 /* 164 * unmap any existing mapping 165 */ 166 if (fdd->devdax_ptr) { 167 if (munmap(fdd->devdax_ptr, fdd->devdax_sz) < 0) 168 return errno; 169 fdd->devdax_ptr = NULL; 170 } 171 172 if (fio_devdax_prep_full(td, io_u)) { 173 td_clear_error(td); 174 ret = fio_devdax_prep_limited(td, io_u); 175 if (ret) 176 return ret; 177 } 178 179 done: 180 io_u->mmap_data = fdd->devdax_ptr + io_u->offset - fdd->devdax_off - 181 f->file_offset; 182 return 0; 183 } 184 185 static int fio_devdax_queue(struct thread_data *td, struct io_u *io_u) 186 { 187 fio_ro_check(td, io_u); 188 io_u->error = 0; 189 190 switch (io_u->ddir) { 191 case DDIR_READ: 192 memcpy(io_u->xfer_buf, io_u->mmap_data, io_u->xfer_buflen); 193 break; 194 case DDIR_WRITE: 195 pmem_memcpy_persist(io_u->mmap_data, io_u->xfer_buf, 196 io_u->xfer_buflen); 197 break; 198 case DDIR_SYNC: 199 case DDIR_DATASYNC: 200 case DDIR_SYNC_FILE_RANGE: 201 break; 202 default: 203 io_u->error = EINVAL; 204 break; 205 } 206 207 return FIO_Q_COMPLETED; 208 } 209 210 static int fio_devdax_init(struct thread_data *td) 211 { 212 struct thread_options *o = &td->o; 213 214 if ((o->rw_min_bs & page_mask) && 215 (o->fsync_blocks || o->fdatasync_blocks)) { 216 log_err("dev-dax: mmap options dictate a minimum block size of %llu bytes\n", 217 (unsigned long long) page_size); 218 return 1; 219 } 220 221 return 0; 222 } 223 224 static int fio_devdax_open_file(struct thread_data *td, struct fio_file *f) 225 { 226 struct fio_devdax_data *fdd; 227 int ret; 228 229 ret = generic_open_file(td, f); 230 if (ret) 231 return ret; 232 233 fdd = calloc(1, sizeof(*fdd)); 234 if (!fdd) { 235 int fio_unused __ret; 236 __ret = generic_close_file(td, f); 237 return 1; 238 } 239 240 FILE_SET_ENG_DATA(f, fdd); 241 242 return 0; 243 } 244 245 static int fio_devdax_close_file(struct thread_data *td, struct fio_file *f) 246 { 247 struct fio_devdax_data *fdd = FILE_ENG_DATA(f); 248 249 FILE_SET_ENG_DATA(f, NULL); 250 free(fdd); 251 fio_file_clear_partial_mmap(f); 252 253 return generic_close_file(td, f); 254 } 255 256 static int 257 fio_devdax_get_file_size(struct thread_data *td, struct fio_file *f) 258 { 259 char spath[PATH_MAX]; 260 char npath[PATH_MAX]; 261 char *rpath; 262 FILE *sfile; 263 uint64_t size; 264 struct stat st; 265 int rc; 266 267 if (fio_file_size_known(f)) 268 return 0; 269 270 if (f->filetype != FIO_TYPE_CHAR) 271 return -EINVAL; 272 273 rc = stat(f->file_name, &st); 274 if (rc < 0) { 275 log_err("%s: failed to stat file %s (%s)\n", 276 td->o.name, f->file_name, strerror(errno)); 277 return -errno; 278 } 279 280 snprintf(spath, PATH_MAX, "/sys/dev/char/%d:%d/subsystem", 281 major(st.st_rdev), minor(st.st_rdev)); 282 283 rpath = realpath(spath, npath); 284 if (!rpath) { 285 log_err("%s: realpath on %s failed (%s)\n", 286 td->o.name, spath, strerror(errno)); 287 return -errno; 288 } 289 290 /* check if DAX device */ 291 if (strcmp("/sys/class/dax", rpath)) { 292 log_err("%s: %s not a DAX device!\n", 293 td->o.name, f->file_name); 294 } 295 296 snprintf(spath, PATH_MAX, "/sys/dev/char/%d:%d/size", 297 major(st.st_rdev), minor(st.st_rdev)); 298 299 sfile = fopen(spath, "r"); 300 if (!sfile) { 301 log_err("%s: fopen on %s failed (%s)\n", 302 td->o.name, spath, strerror(errno)); 303 return 1; 304 } 305 306 rc = fscanf(sfile, "%lu", &size); 307 if (rc < 0) { 308 log_err("%s: fscanf on %s failed (%s)\n", 309 td->o.name, spath, strerror(errno)); 310 return 1; 311 } 312 313 f->real_file_size = size; 314 315 fclose(sfile); 316 317 if (f->file_offset > f->real_file_size) { 318 log_err("%s: offset extends end (%llu > %llu)\n", td->o.name, 319 (unsigned long long) f->file_offset, 320 (unsigned long long) f->real_file_size); 321 return 1; 322 } 323 324 fio_file_set_size_known(f); 325 return 0; 326 } 327 328 static struct ioengine_ops ioengine = { 329 .name = "dev-dax", 330 .version = FIO_IOOPS_VERSION, 331 .init = fio_devdax_init, 332 .prep = fio_devdax_prep, 333 .queue = fio_devdax_queue, 334 .open_file = fio_devdax_open_file, 335 .close_file = fio_devdax_close_file, 336 .get_file_size = fio_devdax_get_file_size, 337 .flags = FIO_SYNCIO | FIO_DISKLESSIO | FIO_NOEXTEND | FIO_NODISKUTIL, 338 }; 339 340 static void fio_init fio_devdax_register(void) 341 { 342 register_ioengine(&ioengine); 343 } 344 345 static void fio_exit fio_devdax_unregister(void) 346 { 347 unregister_ioengine(&ioengine); 348 } 349