Home | History | Annotate | Download | only in engines
      1 /*
      2  * device DAX engine
      3  *
      4  * IO engine that reads/writes from files by doing memcpy to/from
      5  * a memory mapped region of DAX enabled device.
      6  *
      7  * Copyright (C) 2016 Intel Corp
      8  *
      9  * This program is free software; you can redistribute it and/or
     10  * modify it under the terms of the GNU General Public License,
     11  * version 2 as published by the Free Software Foundation..
     12  *
     13  * This program is distributed in the hope that it will be useful,
     14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     16  * GNU General Public License for more details.
     17  *
     18  */
     19 
     20 /*
     21  * device dax engine
     22  * IO engine that access a DAX device directly for read and write data
     23  *
     24  * To use:
     25  *   ioengine=dev-dax
     26  *
     27  *   Other relevant settings:
     28  *     iodepth=1
     29  *     direct=0	   REQUIRED
     30  *     filename=/dev/daxN.N
     31  *     bs=2m
     32  *
     33  *     direct should be left to 0. Using dev-dax implies that memory access
     34  *     is direct. However, dev-dax does not support O_DIRECT flag by design
     35  *     since it is not necessary.
     36  *
     37  *     bs should adhere to the device dax alignment at minimally.
     38  *
     39  * libpmem.so
     40  *   By default, the dev-dax engine will let the system find the libpmem.so
     41  *   that it uses. You can use an alternative libpmem by setting the
     42  *   FIO_PMEM_LIB environment variable to the full path to the desired
     43  *   libpmem.so.
     44  */
     45 
     46 #include <stdio.h>
     47 #include <limits.h>
     48 #include <stdlib.h>
     49 #include <unistd.h>
     50 #include <errno.h>
     51 #include <sys/mman.h>
     52 #include <sys/stat.h>
     53 #include <sys/sysmacros.h>
     54 #include <libgen.h>
     55 #include <libpmem.h>
     56 
     57 #include "../fio.h"
     58 #include "../verify.h"
     59 
     60 /*
     61  * Limits us to 1GiB of mapped files in total to model after
     62  * mmap engine behavior
     63  */
     64 #define MMAP_TOTAL_SZ	(1 * 1024 * 1024 * 1024UL)
     65 
     66 struct fio_devdax_data {
     67 	void *devdax_ptr;
     68 	size_t devdax_sz;
     69 	off_t devdax_off;
     70 };
     71 
     72 static int fio_devdax_file(struct thread_data *td, struct fio_file *f,
     73 			   size_t length, off_t off)
     74 {
     75 	struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
     76 	int flags = 0;
     77 
     78 	if (td_rw(td))
     79 		flags = PROT_READ | PROT_WRITE;
     80 	else if (td_write(td)) {
     81 		flags = PROT_WRITE;
     82 
     83 		if (td->o.verify != VERIFY_NONE)
     84 			flags |= PROT_READ;
     85 	} else
     86 		flags = PROT_READ;
     87 
     88 	fdd->devdax_ptr = mmap(NULL, length, flags, MAP_SHARED, f->fd, off);
     89 	if (fdd->devdax_ptr == MAP_FAILED) {
     90 		fdd->devdax_ptr = NULL;
     91 		td_verror(td, errno, "mmap");
     92 	}
     93 
     94 	if (td->error && fdd->devdax_ptr)
     95 		munmap(fdd->devdax_ptr, length);
     96 
     97 	return td->error;
     98 }
     99 
    100 /*
    101  * Just mmap an appropriate portion, we cannot mmap the full extent
    102  */
    103 static int fio_devdax_prep_limited(struct thread_data *td, struct io_u *io_u)
    104 {
    105 	struct fio_file *f = io_u->file;
    106 	struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
    107 
    108 	if (io_u->buflen > f->real_file_size) {
    109 		log_err("dev-dax: bs too big for dev-dax engine\n");
    110 		return EIO;
    111 	}
    112 
    113 	fdd->devdax_sz = min(MMAP_TOTAL_SZ, f->real_file_size);
    114 	if (fdd->devdax_sz > f->io_size)
    115 		fdd->devdax_sz = f->io_size;
    116 
    117 	fdd->devdax_off = io_u->offset;
    118 
    119 	return fio_devdax_file(td, f, fdd->devdax_sz, fdd->devdax_off);
    120 }
    121 
    122 /*
    123  * Attempt to mmap the entire file
    124  */
    125 static int fio_devdax_prep_full(struct thread_data *td, struct io_u *io_u)
    126 {
    127 	struct fio_file *f = io_u->file;
    128 	struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
    129 	int ret;
    130 
    131 	if (fio_file_partial_mmap(f))
    132 		return EINVAL;
    133 
    134 	if (io_u->offset != (size_t) io_u->offset ||
    135 	    f->io_size != (size_t) f->io_size) {
    136 		fio_file_set_partial_mmap(f);
    137 		return EINVAL;
    138 	}
    139 
    140 	fdd->devdax_sz = f->io_size;
    141 	fdd->devdax_off = 0;
    142 
    143 	ret = fio_devdax_file(td, f, fdd->devdax_sz, fdd->devdax_off);
    144 	if (ret)
    145 		fio_file_set_partial_mmap(f);
    146 
    147 	return ret;
    148 }
    149 
    150 static int fio_devdax_prep(struct thread_data *td, struct io_u *io_u)
    151 {
    152 	struct fio_file *f = io_u->file;
    153 	struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
    154 	int ret;
    155 
    156 	/*
    157 	 * It fits within existing mapping, use it
    158 	 */
    159 	if (io_u->offset >= fdd->devdax_off &&
    160 	    io_u->offset + io_u->buflen < fdd->devdax_off + fdd->devdax_sz)
    161 		goto done;
    162 
    163 	/*
    164 	 * unmap any existing mapping
    165 	 */
    166 	if (fdd->devdax_ptr) {
    167 		if (munmap(fdd->devdax_ptr, fdd->devdax_sz) < 0)
    168 			return errno;
    169 		fdd->devdax_ptr = NULL;
    170 	}
    171 
    172 	if (fio_devdax_prep_full(td, io_u)) {
    173 		td_clear_error(td);
    174 		ret = fio_devdax_prep_limited(td, io_u);
    175 		if (ret)
    176 			return ret;
    177 	}
    178 
    179 done:
    180 	io_u->mmap_data = fdd->devdax_ptr + io_u->offset - fdd->devdax_off -
    181 				f->file_offset;
    182 	return 0;
    183 }
    184 
    185 static int fio_devdax_queue(struct thread_data *td, struct io_u *io_u)
    186 {
    187 	fio_ro_check(td, io_u);
    188 	io_u->error = 0;
    189 
    190 	switch (io_u->ddir) {
    191 	case DDIR_READ:
    192 		memcpy(io_u->xfer_buf, io_u->mmap_data, io_u->xfer_buflen);
    193 		break;
    194 	case DDIR_WRITE:
    195 		pmem_memcpy_persist(io_u->mmap_data, io_u->xfer_buf,
    196 				    io_u->xfer_buflen);
    197 		break;
    198 	case DDIR_SYNC:
    199 	case DDIR_DATASYNC:
    200 	case DDIR_SYNC_FILE_RANGE:
    201 		break;
    202 	default:
    203 		io_u->error = EINVAL;
    204 		break;
    205 	}
    206 
    207 	return FIO_Q_COMPLETED;
    208 }
    209 
    210 static int fio_devdax_init(struct thread_data *td)
    211 {
    212 	struct thread_options *o = &td->o;
    213 
    214 	if ((o->rw_min_bs & page_mask) &&
    215 	    (o->fsync_blocks || o->fdatasync_blocks)) {
    216 		log_err("dev-dax: mmap options dictate a minimum block size of %llu bytes\n",
    217 			(unsigned long long) page_size);
    218 		return 1;
    219 	}
    220 
    221 	return 0;
    222 }
    223 
    224 static int fio_devdax_open_file(struct thread_data *td, struct fio_file *f)
    225 {
    226 	struct fio_devdax_data *fdd;
    227 	int ret;
    228 
    229 	ret = generic_open_file(td, f);
    230 	if (ret)
    231 		return ret;
    232 
    233 	fdd = calloc(1, sizeof(*fdd));
    234 	if (!fdd) {
    235 		int fio_unused __ret;
    236 		__ret = generic_close_file(td, f);
    237 		return 1;
    238 	}
    239 
    240 	FILE_SET_ENG_DATA(f, fdd);
    241 
    242 	return 0;
    243 }
    244 
    245 static int fio_devdax_close_file(struct thread_data *td, struct fio_file *f)
    246 {
    247 	struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
    248 
    249 	FILE_SET_ENG_DATA(f, NULL);
    250 	free(fdd);
    251 	fio_file_clear_partial_mmap(f);
    252 
    253 	return generic_close_file(td, f);
    254 }
    255 
    256 static int
    257 fio_devdax_get_file_size(struct thread_data *td, struct fio_file *f)
    258 {
    259 	char spath[PATH_MAX];
    260 	char npath[PATH_MAX];
    261 	char *rpath;
    262 	FILE *sfile;
    263 	uint64_t size;
    264 	struct stat st;
    265 	int rc;
    266 
    267 	if (fio_file_size_known(f))
    268 		return 0;
    269 
    270 	if (f->filetype != FIO_TYPE_CHAR)
    271 		return -EINVAL;
    272 
    273 	rc = stat(f->file_name, &st);
    274 	if (rc < 0) {
    275 		log_err("%s: failed to stat file %s (%s)\n",
    276 			td->o.name, f->file_name, strerror(errno));
    277 		return -errno;
    278 	}
    279 
    280 	snprintf(spath, PATH_MAX, "/sys/dev/char/%d:%d/subsystem",
    281 		 major(st.st_rdev), minor(st.st_rdev));
    282 
    283 	rpath = realpath(spath, npath);
    284 	if (!rpath) {
    285 		log_err("%s: realpath on %s failed (%s)\n",
    286 			td->o.name, spath, strerror(errno));
    287 		return -errno;
    288 	}
    289 
    290 	/* check if DAX device */
    291 	if (strcmp("/sys/class/dax", rpath)) {
    292 		log_err("%s: %s not a DAX device!\n",
    293 			td->o.name, f->file_name);
    294 	}
    295 
    296 	snprintf(spath, PATH_MAX, "/sys/dev/char/%d:%d/size",
    297 		 major(st.st_rdev), minor(st.st_rdev));
    298 
    299 	sfile = fopen(spath, "r");
    300 	if (!sfile) {
    301 		log_err("%s: fopen on %s failed (%s)\n",
    302 			td->o.name, spath, strerror(errno));
    303 		return 1;
    304 	}
    305 
    306 	rc = fscanf(sfile, "%lu", &size);
    307 	if (rc < 0) {
    308 		log_err("%s: fscanf on %s failed (%s)\n",
    309 			td->o.name, spath, strerror(errno));
    310 		return 1;
    311 	}
    312 
    313 	f->real_file_size = size;
    314 
    315 	fclose(sfile);
    316 
    317 	if (f->file_offset > f->real_file_size) {
    318 		log_err("%s: offset extends end (%llu > %llu)\n", td->o.name,
    319 					(unsigned long long) f->file_offset,
    320 					(unsigned long long) f->real_file_size);
    321 		return 1;
    322 	}
    323 
    324 	fio_file_set_size_known(f);
    325 	return 0;
    326 }
    327 
    328 static struct ioengine_ops ioengine = {
    329 	.name		= "dev-dax",
    330 	.version	= FIO_IOOPS_VERSION,
    331 	.init		= fio_devdax_init,
    332 	.prep		= fio_devdax_prep,
    333 	.queue		= fio_devdax_queue,
    334 	.open_file	= fio_devdax_open_file,
    335 	.close_file	= fio_devdax_close_file,
    336 	.get_file_size	= fio_devdax_get_file_size,
    337 	.flags		= FIO_SYNCIO | FIO_DISKLESSIO | FIO_NOEXTEND | FIO_NODISKUTIL,
    338 };
    339 
    340 static void fio_init fio_devdax_register(void)
    341 {
    342 	register_ioengine(&ioengine);
    343 }
    344 
    345 static void fio_exit fio_devdax_unregister(void)
    346 {
    347 	unregister_ioengine(&ioengine);
    348 }
    349