Home | History | Annotate | Download | only in nvme
      1 // SPDX-License-Identifier: GPL-2.0+
      2 /*
      3  * Copyright (C) 2017 NXP Semiconductors
      4  * Copyright (C) 2017 Bin Meng <bmeng.cn (at) gmail.com>
      5  */
      6 
      7 #include <common.h>
      8 #include <dm.h>
      9 #include <errno.h>
     10 #include <memalign.h>
     11 #include <pci.h>
     12 #include <dm/device-internal.h>
     13 #include "nvme.h"
     14 
     15 #define NVME_Q_DEPTH		2
     16 #define NVME_AQ_DEPTH		2
     17 #define NVME_SQ_SIZE(depth)	(depth * sizeof(struct nvme_command))
     18 #define NVME_CQ_SIZE(depth)	(depth * sizeof(struct nvme_completion))
     19 #define ADMIN_TIMEOUT		60
     20 #define IO_TIMEOUT		30
     21 #define MAX_PRP_POOL		512
     22 
     23 enum nvme_queue_id {
     24 	NVME_ADMIN_Q,
     25 	NVME_IO_Q,
     26 	NVME_Q_NUM,
     27 };
     28 
     29 /*
     30  * An NVM Express queue. Each device has at least two (one for admin
     31  * commands and one for I/O commands).
     32  */
     33 struct nvme_queue {
     34 	struct nvme_dev *dev;
     35 	struct nvme_command *sq_cmds;
     36 	struct nvme_completion *cqes;
     37 	wait_queue_head_t sq_full;
     38 	u32 __iomem *q_db;
     39 	u16 q_depth;
     40 	s16 cq_vector;
     41 	u16 sq_head;
     42 	u16 sq_tail;
     43 	u16 cq_head;
     44 	u16 qid;
     45 	u8 cq_phase;
     46 	u8 cqe_seen;
     47 	unsigned long cmdid_data[];
     48 };
     49 
     50 static int nvme_wait_ready(struct nvme_dev *dev, bool enabled)
     51 {
     52 	u32 bit = enabled ? NVME_CSTS_RDY : 0;
     53 	int timeout;
     54 	ulong start;
     55 
     56 	/* Timeout field in the CAP register is in 500 millisecond units */
     57 	timeout = NVME_CAP_TIMEOUT(dev->cap) * 500;
     58 
     59 	start = get_timer(0);
     60 	while (get_timer(start) < timeout) {
     61 		if ((readl(&dev->bar->csts) & NVME_CSTS_RDY) == bit)
     62 			return 0;
     63 	}
     64 
     65 	return -ETIME;
     66 }
     67 
     68 static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2,
     69 			   int total_len, u64 dma_addr)
     70 {
     71 	u32 page_size = dev->page_size;
     72 	int offset = dma_addr & (page_size - 1);
     73 	u64 *prp_pool;
     74 	int length = total_len;
     75 	int i, nprps;
     76 	length -= (page_size - offset);
     77 
     78 	if (length <= 0) {
     79 		*prp2 = 0;
     80 		return 0;
     81 	}
     82 
     83 	if (length)
     84 		dma_addr += (page_size - offset);
     85 
     86 	if (length <= page_size) {
     87 		*prp2 = dma_addr;
     88 		return 0;
     89 	}
     90 
     91 	nprps = DIV_ROUND_UP(length, page_size);
     92 
     93 	if (nprps > dev->prp_entry_num) {
     94 		free(dev->prp_pool);
     95 		dev->prp_pool = malloc(nprps << 3);
     96 		if (!dev->prp_pool) {
     97 			printf("Error: malloc prp_pool fail\n");
     98 			return -ENOMEM;
     99 		}
    100 		dev->prp_entry_num = nprps;
    101 	}
    102 
    103 	prp_pool = dev->prp_pool;
    104 	i = 0;
    105 	while (nprps) {
    106 		if (i == ((page_size >> 3) - 1)) {
    107 			*(prp_pool + i) = cpu_to_le64((ulong)prp_pool +
    108 					page_size);
    109 			i = 0;
    110 			prp_pool += page_size;
    111 		}
    112 		*(prp_pool + i++) = cpu_to_le64(dma_addr);
    113 		dma_addr += page_size;
    114 		nprps--;
    115 	}
    116 	*prp2 = (ulong)dev->prp_pool;
    117 
    118 	return 0;
    119 }
    120 
    121 static __le16 nvme_get_cmd_id(void)
    122 {
    123 	static unsigned short cmdid;
    124 
    125 	return cpu_to_le16((cmdid < USHRT_MAX) ? cmdid++ : 0);
    126 }
    127 
    128 static u16 nvme_read_completion_status(struct nvme_queue *nvmeq, u16 index)
    129 {
    130 	u64 start = (ulong)&nvmeq->cqes[index];
    131 	u64 stop = start + sizeof(struct nvme_completion);
    132 
    133 	invalidate_dcache_range(start, stop);
    134 
    135 	return le16_to_cpu(readw(&(nvmeq->cqes[index].status)));
    136 }
    137 
    138 /**
    139  * nvme_submit_cmd() - copy a command into a queue and ring the doorbell
    140  *
    141  * @nvmeq:	The queue to use
    142  * @cmd:	The command to send
    143  */
    144 static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
    145 {
    146 	u16 tail = nvmeq->sq_tail;
    147 
    148 	memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
    149 	flush_dcache_range((ulong)&nvmeq->sq_cmds[tail],
    150 			   (ulong)&nvmeq->sq_cmds[tail] + sizeof(*cmd));
    151 
    152 	if (++tail == nvmeq->q_depth)
    153 		tail = 0;
    154 	writel(tail, nvmeq->q_db);
    155 	nvmeq->sq_tail = tail;
    156 }
    157 
    158 static int nvme_submit_sync_cmd(struct nvme_queue *nvmeq,
    159 				struct nvme_command *cmd,
    160 				u32 *result, unsigned timeout)
    161 {
    162 	u16 head = nvmeq->cq_head;
    163 	u16 phase = nvmeq->cq_phase;
    164 	u16 status;
    165 	ulong start_time;
    166 	ulong timeout_us = timeout * 100000;
    167 
    168 	cmd->common.command_id = nvme_get_cmd_id();
    169 	nvme_submit_cmd(nvmeq, cmd);
    170 
    171 	start_time = timer_get_us();
    172 
    173 	for (;;) {
    174 		status = nvme_read_completion_status(nvmeq, head);
    175 		if ((status & 0x01) == phase)
    176 			break;
    177 		if (timeout_us > 0 && (timer_get_us() - start_time)
    178 		    >= timeout_us)
    179 			return -ETIMEDOUT;
    180 	}
    181 
    182 	status >>= 1;
    183 	if (status) {
    184 		printf("ERROR: status = %x, phase = %d, head = %d\n",
    185 		       status, phase, head);
    186 		status = 0;
    187 		if (++head == nvmeq->q_depth) {
    188 			head = 0;
    189 			phase = !phase;
    190 		}
    191 		writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
    192 		nvmeq->cq_head = head;
    193 		nvmeq->cq_phase = phase;
    194 
    195 		return -EIO;
    196 	}
    197 
    198 	if (result)
    199 		*result = le32_to_cpu(readl(&(nvmeq->cqes[head].result)));
    200 
    201 	if (++head == nvmeq->q_depth) {
    202 		head = 0;
    203 		phase = !phase;
    204 	}
    205 	writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
    206 	nvmeq->cq_head = head;
    207 	nvmeq->cq_phase = phase;
    208 
    209 	return status;
    210 }
    211 
    212 static int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
    213 				 u32 *result)
    214 {
    215 	return nvme_submit_sync_cmd(dev->queues[NVME_ADMIN_Q], cmd,
    216 				    result, ADMIN_TIMEOUT);
    217 }
    218 
    219 static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev,
    220 					   int qid, int depth)
    221 {
    222 	struct nvme_queue *nvmeq = malloc(sizeof(*nvmeq));
    223 	if (!nvmeq)
    224 		return NULL;
    225 	memset(nvmeq, 0, sizeof(*nvmeq));
    226 
    227 	nvmeq->cqes = (void *)memalign(4096, NVME_CQ_SIZE(depth));
    228 	if (!nvmeq->cqes)
    229 		goto free_nvmeq;
    230 	memset((void *)nvmeq->cqes, 0, NVME_CQ_SIZE(depth));
    231 
    232 	nvmeq->sq_cmds = (void *)memalign(4096, NVME_SQ_SIZE(depth));
    233 	if (!nvmeq->sq_cmds)
    234 		goto free_queue;
    235 	memset((void *)nvmeq->sq_cmds, 0, NVME_SQ_SIZE(depth));
    236 
    237 	nvmeq->dev = dev;
    238 
    239 	nvmeq->cq_head = 0;
    240 	nvmeq->cq_phase = 1;
    241 	nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
    242 	nvmeq->q_depth = depth;
    243 	nvmeq->qid = qid;
    244 	dev->queue_count++;
    245 	dev->queues[qid] = nvmeq;
    246 
    247 	return nvmeq;
    248 
    249  free_queue:
    250 	free((void *)nvmeq->cqes);
    251  free_nvmeq:
    252 	free(nvmeq);
    253 
    254 	return NULL;
    255 }
    256 
    257 static int nvme_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
    258 {
    259 	struct nvme_command c;
    260 
    261 	memset(&c, 0, sizeof(c));
    262 	c.delete_queue.opcode = opcode;
    263 	c.delete_queue.qid = cpu_to_le16(id);
    264 
    265 	return nvme_submit_admin_cmd(dev, &c, NULL);
    266 }
    267 
    268 static int nvme_delete_sq(struct nvme_dev *dev, u16 sqid)
    269 {
    270 	return nvme_delete_queue(dev, nvme_admin_delete_sq, sqid);
    271 }
    272 
    273 static int nvme_delete_cq(struct nvme_dev *dev, u16 cqid)
    274 {
    275 	return nvme_delete_queue(dev, nvme_admin_delete_cq, cqid);
    276 }
    277 
    278 static int nvme_enable_ctrl(struct nvme_dev *dev)
    279 {
    280 	dev->ctrl_config &= ~NVME_CC_SHN_MASK;
    281 	dev->ctrl_config |= NVME_CC_ENABLE;
    282 	writel(cpu_to_le32(dev->ctrl_config), &dev->bar->cc);
    283 
    284 	return nvme_wait_ready(dev, true);
    285 }
    286 
    287 static int nvme_disable_ctrl(struct nvme_dev *dev)
    288 {
    289 	dev->ctrl_config &= ~NVME_CC_SHN_MASK;
    290 	dev->ctrl_config &= ~NVME_CC_ENABLE;
    291 	writel(cpu_to_le32(dev->ctrl_config), &dev->bar->cc);
    292 
    293 	return nvme_wait_ready(dev, false);
    294 }
    295 
    296 static void nvme_free_queue(struct nvme_queue *nvmeq)
    297 {
    298 	free((void *)nvmeq->cqes);
    299 	free(nvmeq->sq_cmds);
    300 	free(nvmeq);
    301 }
    302 
    303 static void nvme_free_queues(struct nvme_dev *dev, int lowest)
    304 {
    305 	int i;
    306 
    307 	for (i = dev->queue_count - 1; i >= lowest; i--) {
    308 		struct nvme_queue *nvmeq = dev->queues[i];
    309 		dev->queue_count--;
    310 		dev->queues[i] = NULL;
    311 		nvme_free_queue(nvmeq);
    312 	}
    313 }
    314 
    315 static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
    316 {
    317 	struct nvme_dev *dev = nvmeq->dev;
    318 
    319 	nvmeq->sq_tail = 0;
    320 	nvmeq->cq_head = 0;
    321 	nvmeq->cq_phase = 1;
    322 	nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
    323 	memset((void *)nvmeq->cqes, 0, NVME_CQ_SIZE(nvmeq->q_depth));
    324 	flush_dcache_range((ulong)nvmeq->cqes,
    325 			   (ulong)nvmeq->cqes + NVME_CQ_SIZE(nvmeq->q_depth));
    326 	dev->online_queues++;
    327 }
    328 
    329 static int nvme_configure_admin_queue(struct nvme_dev *dev)
    330 {
    331 	int result;
    332 	u32 aqa;
    333 	u64 cap = dev->cap;
    334 	struct nvme_queue *nvmeq;
    335 	/* most architectures use 4KB as the page size */
    336 	unsigned page_shift = 12;
    337 	unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12;
    338 	unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12;
    339 
    340 	if (page_shift < dev_page_min) {
    341 		debug("Device minimum page size (%u) too large for host (%u)\n",
    342 		      1 << dev_page_min, 1 << page_shift);
    343 		return -ENODEV;
    344 	}
    345 
    346 	if (page_shift > dev_page_max) {
    347 		debug("Device maximum page size (%u) smaller than host (%u)\n",
    348 		      1 << dev_page_max, 1 << page_shift);
    349 		page_shift = dev_page_max;
    350 	}
    351 
    352 	result = nvme_disable_ctrl(dev);
    353 	if (result < 0)
    354 		return result;
    355 
    356 	nvmeq = dev->queues[NVME_ADMIN_Q];
    357 	if (!nvmeq) {
    358 		nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
    359 		if (!nvmeq)
    360 			return -ENOMEM;
    361 	}
    362 
    363 	aqa = nvmeq->q_depth - 1;
    364 	aqa |= aqa << 16;
    365 	aqa |= aqa << 16;
    366 
    367 	dev->page_size = 1 << page_shift;
    368 
    369 	dev->ctrl_config = NVME_CC_CSS_NVM;
    370 	dev->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
    371 	dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
    372 	dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
    373 
    374 	writel(aqa, &dev->bar->aqa);
    375 	nvme_writeq((ulong)nvmeq->sq_cmds, &dev->bar->asq);
    376 	nvme_writeq((ulong)nvmeq->cqes, &dev->bar->acq);
    377 
    378 	result = nvme_enable_ctrl(dev);
    379 	if (result)
    380 		goto free_nvmeq;
    381 
    382 	nvmeq->cq_vector = 0;
    383 
    384 	nvme_init_queue(dev->queues[NVME_ADMIN_Q], 0);
    385 
    386 	return result;
    387 
    388  free_nvmeq:
    389 	nvme_free_queues(dev, 0);
    390 
    391 	return result;
    392 }
    393 
    394 static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid,
    395 			    struct nvme_queue *nvmeq)
    396 {
    397 	struct nvme_command c;
    398 	int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
    399 
    400 	memset(&c, 0, sizeof(c));
    401 	c.create_cq.opcode = nvme_admin_create_cq;
    402 	c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes);
    403 	c.create_cq.cqid = cpu_to_le16(qid);
    404 	c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
    405 	c.create_cq.cq_flags = cpu_to_le16(flags);
    406 	c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
    407 
    408 	return nvme_submit_admin_cmd(dev, &c, NULL);
    409 }
    410 
    411 static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid,
    412 			    struct nvme_queue *nvmeq)
    413 {
    414 	struct nvme_command c;
    415 	int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
    416 
    417 	memset(&c, 0, sizeof(c));
    418 	c.create_sq.opcode = nvme_admin_create_sq;
    419 	c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds);
    420 	c.create_sq.sqid = cpu_to_le16(qid);
    421 	c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
    422 	c.create_sq.sq_flags = cpu_to_le16(flags);
    423 	c.create_sq.cqid = cpu_to_le16(qid);
    424 
    425 	return nvme_submit_admin_cmd(dev, &c, NULL);
    426 }
    427 
    428 int nvme_identify(struct nvme_dev *dev, unsigned nsid,
    429 		  unsigned cns, dma_addr_t dma_addr)
    430 {
    431 	struct nvme_command c;
    432 	u32 page_size = dev->page_size;
    433 	int offset = dma_addr & (page_size - 1);
    434 	int length = sizeof(struct nvme_id_ctrl);
    435 	int ret;
    436 
    437 	memset(&c, 0, sizeof(c));
    438 	c.identify.opcode = nvme_admin_identify;
    439 	c.identify.nsid = cpu_to_le32(nsid);
    440 	c.identify.prp1 = cpu_to_le64(dma_addr);
    441 
    442 	length -= (page_size - offset);
    443 	if (length <= 0) {
    444 		c.identify.prp2 = 0;
    445 	} else {
    446 		dma_addr += (page_size - offset);
    447 		c.identify.prp2 = cpu_to_le64(dma_addr);
    448 	}
    449 
    450 	c.identify.cns = cpu_to_le32(cns);
    451 
    452 	ret = nvme_submit_admin_cmd(dev, &c, NULL);
    453 	if (!ret)
    454 		invalidate_dcache_range(dma_addr,
    455 					dma_addr + sizeof(struct nvme_id_ctrl));
    456 
    457 	return ret;
    458 }
    459 
    460 int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
    461 		      dma_addr_t dma_addr, u32 *result)
    462 {
    463 	struct nvme_command c;
    464 
    465 	memset(&c, 0, sizeof(c));
    466 	c.features.opcode = nvme_admin_get_features;
    467 	c.features.nsid = cpu_to_le32(nsid);
    468 	c.features.prp1 = cpu_to_le64(dma_addr);
    469 	c.features.fid = cpu_to_le32(fid);
    470 
    471 	/*
    472 	 * TODO: add cache invalidate operation when the size of
    473 	 * the DMA buffer is known
    474 	 */
    475 
    476 	return nvme_submit_admin_cmd(dev, &c, result);
    477 }
    478 
    479 int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
    480 		      dma_addr_t dma_addr, u32 *result)
    481 {
    482 	struct nvme_command c;
    483 
    484 	memset(&c, 0, sizeof(c));
    485 	c.features.opcode = nvme_admin_set_features;
    486 	c.features.prp1 = cpu_to_le64(dma_addr);
    487 	c.features.fid = cpu_to_le32(fid);
    488 	c.features.dword11 = cpu_to_le32(dword11);
    489 
    490 	/*
    491 	 * TODO: add cache flush operation when the size of
    492 	 * the DMA buffer is known
    493 	 */
    494 
    495 	return nvme_submit_admin_cmd(dev, &c, result);
    496 }
    497 
    498 static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
    499 {
    500 	struct nvme_dev *dev = nvmeq->dev;
    501 	int result;
    502 
    503 	nvmeq->cq_vector = qid - 1;
    504 	result = nvme_alloc_cq(dev, qid, nvmeq);
    505 	if (result < 0)
    506 		goto release_cq;
    507 
    508 	result = nvme_alloc_sq(dev, qid, nvmeq);
    509 	if (result < 0)
    510 		goto release_sq;
    511 
    512 	nvme_init_queue(nvmeq, qid);
    513 
    514 	return result;
    515 
    516  release_sq:
    517 	nvme_delete_sq(dev, qid);
    518  release_cq:
    519 	nvme_delete_cq(dev, qid);
    520 
    521 	return result;
    522 }
    523 
    524 static int nvme_set_queue_count(struct nvme_dev *dev, int count)
    525 {
    526 	int status;
    527 	u32 result;
    528 	u32 q_count = (count - 1) | ((count - 1) << 16);
    529 
    530 	status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES,
    531 			q_count, 0, &result);
    532 
    533 	if (status < 0)
    534 		return status;
    535 	if (status > 1)
    536 		return 0;
    537 
    538 	return min(result & 0xffff, result >> 16) + 1;
    539 }
    540 
    541 static void nvme_create_io_queues(struct nvme_dev *dev)
    542 {
    543 	unsigned int i;
    544 
    545 	for (i = dev->queue_count; i <= dev->max_qid; i++)
    546 		if (!nvme_alloc_queue(dev, i, dev->q_depth))
    547 			break;
    548 
    549 	for (i = dev->online_queues; i <= dev->queue_count - 1; i++)
    550 		if (nvme_create_queue(dev->queues[i], i))
    551 			break;
    552 }
    553 
    554 static int nvme_setup_io_queues(struct nvme_dev *dev)
    555 {
    556 	int nr_io_queues;
    557 	int result;
    558 
    559 	nr_io_queues = 1;
    560 	result = nvme_set_queue_count(dev, nr_io_queues);
    561 	if (result <= 0)
    562 		return result;
    563 
    564 	dev->max_qid = nr_io_queues;
    565 
    566 	/* Free previously allocated queues */
    567 	nvme_free_queues(dev, nr_io_queues + 1);
    568 	nvme_create_io_queues(dev);
    569 
    570 	return 0;
    571 }
    572 
    573 static int nvme_get_info_from_identify(struct nvme_dev *dev)
    574 {
    575 	ALLOC_CACHE_ALIGN_BUFFER(char, buf, sizeof(struct nvme_id_ctrl));
    576 	struct nvme_id_ctrl *ctrl = (struct nvme_id_ctrl *)buf;
    577 	int ret;
    578 	int shift = NVME_CAP_MPSMIN(dev->cap) + 12;
    579 
    580 	ret = nvme_identify(dev, 0, 1, (dma_addr_t)ctrl);
    581 	if (ret)
    582 		return -EIO;
    583 
    584 	dev->nn = le32_to_cpu(ctrl->nn);
    585 	dev->vwc = ctrl->vwc;
    586 	memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
    587 	memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
    588 	memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
    589 	if (ctrl->mdts)
    590 		dev->max_transfer_shift = (ctrl->mdts + shift);
    591 	else {
    592 		/*
    593 		 * Maximum Data Transfer Size (MDTS) field indicates the maximum
    594 		 * data transfer size between the host and the controller. The
    595 		 * host should not submit a command that exceeds this transfer
    596 		 * size. The value is in units of the minimum memory page size
    597 		 * and is reported as a power of two (2^n).
    598 		 *
    599 		 * The spec also says: a value of 0h indicates no restrictions
    600 		 * on transfer size. But in nvme_blk_read/write() below we have
    601 		 * the following algorithm for maximum number of logic blocks
    602 		 * per transfer:
    603 		 *
    604 		 * u16 lbas = 1 << (dev->max_transfer_shift - ns->lba_shift);
    605 		 *
    606 		 * In order for lbas not to overflow, the maximum number is 15
    607 		 * which means dev->max_transfer_shift = 15 + 9 (ns->lba_shift).
    608 		 * Let's use 20 which provides 1MB size.
    609 		 */
    610 		dev->max_transfer_shift = 20;
    611 	}
    612 
    613 	return 0;
    614 }
    615 
    616 int nvme_scan_namespace(void)
    617 {
    618 	struct uclass *uc;
    619 	struct udevice *dev;
    620 	int ret;
    621 
    622 	ret = uclass_get(UCLASS_NVME, &uc);
    623 	if (ret)
    624 		return ret;
    625 
    626 	uclass_foreach_dev(dev, uc) {
    627 		ret = device_probe(dev);
    628 		if (ret)
    629 			return ret;
    630 	}
    631 
    632 	return 0;
    633 }
    634 
    635 static int nvme_blk_probe(struct udevice *udev)
    636 {
    637 	struct nvme_dev *ndev = dev_get_priv(udev->parent);
    638 	struct blk_desc *desc = dev_get_uclass_platdata(udev);
    639 	struct nvme_ns *ns = dev_get_priv(udev);
    640 	u8 flbas;
    641 	ALLOC_CACHE_ALIGN_BUFFER(char, buf, sizeof(struct nvme_id_ns));
    642 	struct nvme_id_ns *id = (struct nvme_id_ns *)buf;
    643 	struct pci_child_platdata *pplat;
    644 
    645 	memset(ns, 0, sizeof(*ns));
    646 	ns->dev = ndev;
    647 	/* extract the namespace id from the block device name */
    648 	ns->ns_id = trailing_strtol(udev->name) + 1;
    649 	if (nvme_identify(ndev, ns->ns_id, 0, (dma_addr_t)id))
    650 		return -EIO;
    651 
    652 	flbas = id->flbas & NVME_NS_FLBAS_LBA_MASK;
    653 	ns->flbas = flbas;
    654 	ns->lba_shift = id->lbaf[flbas].ds;
    655 	ns->mode_select_num_blocks = le64_to_cpu(id->nsze);
    656 	ns->mode_select_block_len = 1 << ns->lba_shift;
    657 	list_add(&ns->list, &ndev->namespaces);
    658 
    659 	desc->lba = ns->mode_select_num_blocks;
    660 	desc->log2blksz = ns->lba_shift;
    661 	desc->blksz = 1 << ns->lba_shift;
    662 	desc->bdev = udev;
    663 	pplat = dev_get_parent_platdata(udev->parent);
    664 	sprintf(desc->vendor, "0x%.4x", pplat->vendor);
    665 	memcpy(desc->product, ndev->serial, sizeof(ndev->serial));
    666 	memcpy(desc->revision, ndev->firmware_rev, sizeof(ndev->firmware_rev));
    667 	part_init(desc);
    668 
    669 	return 0;
    670 }
    671 
    672 static ulong nvme_blk_rw(struct udevice *udev, lbaint_t blknr,
    673 			 lbaint_t blkcnt, void *buffer, bool read)
    674 {
    675 	struct nvme_ns *ns = dev_get_priv(udev);
    676 	struct nvme_dev *dev = ns->dev;
    677 	struct nvme_command c;
    678 	struct blk_desc *desc = dev_get_uclass_platdata(udev);
    679 	int status;
    680 	u64 prp2;
    681 	u64 total_len = blkcnt << desc->log2blksz;
    682 	u64 temp_len = total_len;
    683 
    684 	u64 slba = blknr;
    685 	u16 lbas = 1 << (dev->max_transfer_shift - ns->lba_shift);
    686 	u64 total_lbas = blkcnt;
    687 
    688 	if (!read)
    689 		flush_dcache_range((unsigned long)buffer,
    690 				   (unsigned long)buffer + total_len);
    691 
    692 	c.rw.opcode = read ? nvme_cmd_read : nvme_cmd_write;
    693 	c.rw.flags = 0;
    694 	c.rw.nsid = cpu_to_le32(ns->ns_id);
    695 	c.rw.control = 0;
    696 	c.rw.dsmgmt = 0;
    697 	c.rw.reftag = 0;
    698 	c.rw.apptag = 0;
    699 	c.rw.appmask = 0;
    700 	c.rw.metadata = 0;
    701 
    702 	while (total_lbas) {
    703 		if (total_lbas < lbas) {
    704 			lbas = (u16)total_lbas;
    705 			total_lbas = 0;
    706 		} else {
    707 			total_lbas -= lbas;
    708 		}
    709 
    710 		if (nvme_setup_prps(dev, &prp2,
    711 				    lbas << ns->lba_shift, (ulong)buffer))
    712 			return -EIO;
    713 		c.rw.slba = cpu_to_le64(slba);
    714 		slba += lbas;
    715 		c.rw.length = cpu_to_le16(lbas - 1);
    716 		c.rw.prp1 = cpu_to_le64((ulong)buffer);
    717 		c.rw.prp2 = cpu_to_le64(prp2);
    718 		status = nvme_submit_sync_cmd(dev->queues[NVME_IO_Q],
    719 				&c, NULL, IO_TIMEOUT);
    720 		if (status)
    721 			break;
    722 		temp_len -= (u32)lbas << ns->lba_shift;
    723 		buffer += lbas << ns->lba_shift;
    724 	}
    725 
    726 	if (read)
    727 		invalidate_dcache_range((unsigned long)buffer,
    728 					(unsigned long)buffer + total_len);
    729 
    730 	return (total_len - temp_len) >> desc->log2blksz;
    731 }
    732 
    733 static ulong nvme_blk_read(struct udevice *udev, lbaint_t blknr,
    734 			   lbaint_t blkcnt, void *buffer)
    735 {
    736 	return nvme_blk_rw(udev, blknr, blkcnt, buffer, true);
    737 }
    738 
    739 static ulong nvme_blk_write(struct udevice *udev, lbaint_t blknr,
    740 			    lbaint_t blkcnt, const void *buffer)
    741 {
    742 	return nvme_blk_rw(udev, blknr, blkcnt, (void *)buffer, false);
    743 }
    744 
    745 static const struct blk_ops nvme_blk_ops = {
    746 	.read	= nvme_blk_read,
    747 	.write	= nvme_blk_write,
    748 };
    749 
    750 U_BOOT_DRIVER(nvme_blk) = {
    751 	.name	= "nvme-blk",
    752 	.id	= UCLASS_BLK,
    753 	.probe	= nvme_blk_probe,
    754 	.ops	= &nvme_blk_ops,
    755 	.priv_auto_alloc_size = sizeof(struct nvme_ns),
    756 };
    757 
    758 static int nvme_bind(struct udevice *udev)
    759 {
    760 	static int ndev_num;
    761 	char name[20];
    762 
    763 	sprintf(name, "nvme#%d", ndev_num++);
    764 
    765 	return device_set_name(udev, name);
    766 }
    767 
    768 static int nvme_probe(struct udevice *udev)
    769 {
    770 	int ret;
    771 	struct nvme_dev *ndev = dev_get_priv(udev);
    772 
    773 	ndev->instance = trailing_strtol(udev->name);
    774 
    775 	INIT_LIST_HEAD(&ndev->namespaces);
    776 	ndev->bar = dm_pci_map_bar(udev, PCI_BASE_ADDRESS_0,
    777 			PCI_REGION_MEM);
    778 	if (readl(&ndev->bar->csts) == -1) {
    779 		ret = -ENODEV;
    780 		printf("Error: %s: Out of memory!\n", udev->name);
    781 		goto free_nvme;
    782 	}
    783 
    784 	ndev->queues = malloc(NVME_Q_NUM * sizeof(struct nvme_queue *));
    785 	if (!ndev->queues) {
    786 		ret = -ENOMEM;
    787 		printf("Error: %s: Out of memory!\n", udev->name);
    788 		goto free_nvme;
    789 	}
    790 	memset(ndev->queues, 0, NVME_Q_NUM * sizeof(struct nvme_queue *));
    791 
    792 	ndev->prp_pool = malloc(MAX_PRP_POOL);
    793 	if (!ndev->prp_pool) {
    794 		ret = -ENOMEM;
    795 		printf("Error: %s: Out of memory!\n", udev->name);
    796 		goto free_nvme;
    797 	}
    798 	ndev->prp_entry_num = MAX_PRP_POOL >> 3;
    799 
    800 	ndev->cap = nvme_readq(&ndev->bar->cap);
    801 	ndev->q_depth = min_t(int, NVME_CAP_MQES(ndev->cap) + 1, NVME_Q_DEPTH);
    802 	ndev->db_stride = 1 << NVME_CAP_STRIDE(ndev->cap);
    803 	ndev->dbs = ((void __iomem *)ndev->bar) + 4096;
    804 
    805 	ret = nvme_configure_admin_queue(ndev);
    806 	if (ret)
    807 		goto free_queue;
    808 
    809 	ret = nvme_setup_io_queues(ndev);
    810 	if (ret)
    811 		goto free_queue;
    812 
    813 	nvme_get_info_from_identify(ndev);
    814 
    815 	return 0;
    816 
    817 free_queue:
    818 	free((void *)ndev->queues);
    819 free_nvme:
    820 	return ret;
    821 }
    822 
    823 U_BOOT_DRIVER(nvme) = {
    824 	.name	= "nvme",
    825 	.id	= UCLASS_NVME,
    826 	.bind	= nvme_bind,
    827 	.probe	= nvme_probe,
    828 	.priv_auto_alloc_size = sizeof(struct nvme_dev),
    829 };
    830 
    831 struct pci_device_id nvme_supported[] = {
    832 	{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, ~0) },
    833 	{}
    834 };
    835 
    836 U_BOOT_PCI_DEVICE(nvme, nvme_supported);
    837