summaryrefslogtreecommitdiffstats
path: root/drivers/nvme/host/pci.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/pci.c')
-rw-r--r--drivers/nvme/host/pci.c697
1 files changed, 697 insertions, 0 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
new file mode 100644
index 0000000000..387bc45a7b
--- /dev/null
+++ b/drivers/nvme/host/pci.c
@@ -0,0 +1,697 @@
+
+#include <common.h>
+#include <init.h>
+#include <io.h>
+#include <io-64-nonatomic-lo-hi.h>
+#include <linux/pci.h>
+
+#include <dma.h>
+
+#include "nvme.h"
+
+#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
+#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
+
+#define NVME_MAX_KB_SZ 4096
+
+static int io_queue_depth = 2;
+
+struct nvme_dev;
+
+/*
+ * An NVM Express queue. Each device has at least two (one for admin
+ * commands and one for I/O commands).
+ */
+struct nvme_queue {
+ struct nvme_dev *dev;
+ struct nvme_request *req;
+ struct nvme_command *sq_cmds;
+ volatile struct nvme_completion *cqes;
+ dma_addr_t sq_dma_addr;
+ dma_addr_t cq_dma_addr;
+ u32 __iomem *q_db;
+ u16 q_depth;
+ u16 sq_tail;
+ u16 cq_head;
+ u16 qid;
+ u8 cq_phase;
+
+ u16 counter;
+};
+
+/*
+ * Represents an NVM Express device. Each nvme_dev is a PCI function.
+ */
+struct nvme_dev {
+ struct nvme_queue queues[NVME_QID_NUM];
+ u32 __iomem *dbs;
+ struct device_d *dev;
+ unsigned online_queues;
+ unsigned max_qid;
+ int q_depth;
+ u32 db_stride;
+ void __iomem *bar;
+ bool subsystem;
+ struct nvme_ctrl ctrl;
+ __le64 *prp_pool;
+ unsigned int prp_pool_size;
+ dma_addr_t prp_dma;
+};
+
+static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
+{
+ return container_of(ctrl, struct nvme_dev, ctrl);
+}
+
+static int nvme_pci_setup_prps(struct nvme_dev *dev,
+ const struct nvme_request *req,
+ struct nvme_rw_command *cmnd)
+{
+ int length = req->buffer_len;
+ const int page_size = dev->ctrl.page_size;
+ dma_addr_t dma_addr = req->buffer_dma_addr;
+ u32 offset = dma_addr & (page_size - 1);
+ u64 prp1 = dma_addr;
+ __le64 *prp_list;
+ int i, nprps;
+ dma_addr_t prp_dma;
+
+
+ length -= (page_size - offset);
+ if (length <= 0) {
+ prp_dma = 0;
+ goto done;
+ }
+
+ dma_addr += (page_size - offset);
+
+ if (length <= page_size) {
+ prp_dma = dma_addr;
+ goto done;
+ }
+
+ nprps = DIV_ROUND_UP(length, page_size);
+ if (nprps > dev->prp_pool_size) {
+ dma_free_coherent(dev->prp_pool, dev->prp_dma,
+ dev->prp_pool_size * sizeof(u64));
+ dev->prp_pool_size = nprps;
+ dev->prp_pool = dma_alloc_coherent(nprps * sizeof(u64),
+ &dev->prp_dma);
+ }
+
+ prp_list = dev->prp_pool;
+ prp_dma = dev->prp_dma;
+
+ i = 0;
+ for (;;) {
+ if (i == page_size >> 3) {
+ __le64 *old_prp_list = prp_list;
+ prp_list = &prp_list[i];
+ prp_dma += page_size;
+ prp_list[0] = old_prp_list[i - 1];
+ old_prp_list[i - 1] = cpu_to_le64(prp_dma);
+ i = 1;
+ }
+
+ prp_list[i++] = cpu_to_le64(dma_addr);
+ dma_addr += page_size;
+ length -= page_size;
+ if (length <= 0)
+ break;
+ }
+
+done:
+ cmnd->dptr.prp1 = cpu_to_le64(prp1);
+ cmnd->dptr.prp2 = cpu_to_le64(prp_dma);
+
+ return 0;
+}
+
+static int nvme_map_data(struct nvme_dev *dev, struct nvme_request *req)
+{
+ if (!req->buffer || !req->buffer_len)
+ return 0;
+
+ req->buffer_dma_addr = dma_map_single(dev->dev, req->buffer,
+ req->buffer_len, req->dma_dir);
+ if (dma_mapping_error(dev->dev, req->buffer_dma_addr))
+ return -EFAULT;
+
+ return nvme_pci_setup_prps(dev, req, &req->cmd->rw);
+}
+
+static void nvme_unmap_data(struct nvme_dev *dev, struct nvme_request *req)
+{
+ if (!req->buffer || !req->buffer_len)
+ return;
+
+ dma_unmap_single(dev->dev, req->buffer_dma_addr, req->buffer_len,
+ req->dma_dir);
+}
+
+static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth)
+{
+ struct nvme_queue *nvmeq = &dev->queues[qid];
+
+ if (dev->ctrl.queue_count > qid)
+ return 0;
+
+ nvmeq->cqes = dma_alloc_coherent(CQ_SIZE(depth),
+ &nvmeq->cq_dma_addr);
+ if (!nvmeq->cqes)
+ goto free_nvmeq;
+
+ nvmeq->sq_cmds = dma_alloc_coherent(SQ_SIZE(depth),
+ &nvmeq->sq_dma_addr);
+ if (!nvmeq->sq_cmds)
+ goto free_cqdma;
+
+ nvmeq->dev = dev;
+ nvmeq->cq_head = 0;
+ nvmeq->cq_phase = 1;
+ nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
+ nvmeq->q_depth = depth;
+ nvmeq->qid = qid;
+ dev->ctrl.queue_count++;
+
+ return 0;
+
+ free_cqdma:
+ dma_free_coherent((void *)nvmeq->cqes, nvmeq->cq_dma_addr,
+ CQ_SIZE(depth));
+ free_nvmeq:
+ return -ENOMEM;
+}
+
+static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
+{
+ struct nvme_command c;
+
+ memset(&c, 0, sizeof(c));
+ c.delete_queue.opcode = opcode;
+ c.delete_queue.qid = cpu_to_le16(id);
+
+ return nvme_submit_sync_cmd(&dev->ctrl, &c, NULL, 0);
+}
+
+static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
+ struct nvme_queue *nvmeq, s16 vector)
+{
+ struct nvme_command c;
+ int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
+
+ /*
+ * Note: we (ab)use the fact that the prp fields survive if no data
+ * is attached to the request.
+ */
+ memset(&c, 0, sizeof(c));
+ c.create_cq.opcode = nvme_admin_create_cq;
+ c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr);
+ c.create_cq.cqid = cpu_to_le16(qid);
+ c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
+ c.create_cq.cq_flags = cpu_to_le16(flags);
+ c.create_cq.irq_vector = cpu_to_le16(vector);
+
+ return nvme_submit_sync_cmd(&dev->ctrl, &c, NULL, 0);
+}
+
+static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
+ struct nvme_queue *nvmeq)
+{
+ struct nvme_command c;
+ int flags = NVME_QUEUE_PHYS_CONTIG;
+
+ /*
+ * Note: we (ab)use the fact that the prp fields survive if no data
+ * is attached to the request.
+ */
+ memset(&c, 0, sizeof(c));
+ c.create_sq.opcode = nvme_admin_create_sq;
+ c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr);
+ c.create_sq.sqid = cpu_to_le16(qid);
+ c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
+ c.create_sq.sq_flags = cpu_to_le16(flags);
+ c.create_sq.cqid = cpu_to_le16(qid);
+
+ return nvme_submit_sync_cmd(&dev->ctrl, &c, NULL, 0);
+}
+
+static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid)
+{
+ return adapter_delete_queue(dev, nvme_admin_delete_cq, cqid);
+}
+
+static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
+{
+ struct nvme_dev *dev = nvmeq->dev;
+
+ nvmeq->sq_tail = 0;
+ nvmeq->cq_head = 0;
+ nvmeq->cq_phase = 1;
+ nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
+ dev->online_queues++;
+}
+
+static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
+{
+ struct nvme_dev *dev = nvmeq->dev;
+ int result;
+ s16 vector;
+
+ vector = 0;
+ result = adapter_alloc_cq(dev, qid, nvmeq, vector);
+ if (result)
+ return result;
+
+ result = adapter_alloc_sq(dev, qid, nvmeq);
+ if (result < 0)
+ return result;
+ else if (result)
+ goto release_cq;
+
+ nvme_init_queue(nvmeq, qid);
+
+ return result;
+
+release_cq:
+ adapter_delete_cq(dev, qid);
+ return result;
+}
+
+/**
+ * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
+ * @nvmeq: The queue to use
+ * @cmd: The command to send
+ */
+static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
+{
+ memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
+
+ if (++nvmeq->sq_tail == nvmeq->q_depth)
+ nvmeq->sq_tail = 0;
+ writel(nvmeq->sq_tail, nvmeq->q_db);
+}
+
+/* We read the CQE phase first to check if the rest of the entry is valid */
+static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq)
+{
+ return (le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) ==
+ nvmeq->cq_phase;
+}
+
+static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
+{
+ u16 head = nvmeq->cq_head;
+
+ writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
+}
+
+static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
+{
+ volatile struct nvme_completion *cqe = &nvmeq->cqes[idx];
+ struct nvme_request *req = nvmeq->req;
+
+ if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
+ dev_warn(nvmeq->dev->ctrl.dev,
+ "invalid id %d completed on queue %d\n",
+ cqe->command_id, le16_to_cpu(cqe->sq_id));
+ return;
+ }
+
+ if (WARN_ON(cqe->command_id != req->cmd->common.command_id))
+ return;
+
+ nvme_end_request(req, cqe->status, cqe->result);
+}
+
+static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end)
+{
+ while (start != end) {
+ nvme_handle_cqe(nvmeq, start);
+ if (++start == nvmeq->q_depth)
+ start = 0;
+ }
+}
+
+static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
+{
+ if (++nvmeq->cq_head == nvmeq->q_depth) {
+ nvmeq->cq_head = 0;
+ nvmeq->cq_phase = !nvmeq->cq_phase;
+ }
+}
+
+static inline bool nvme_process_cq(struct nvme_queue *nvmeq, u16 *start,
+ u16 *end, int tag)
+{
+ bool found = false;
+
+ *start = nvmeq->cq_head;
+ while (!found && nvme_cqe_pending(nvmeq)) {
+ if (nvmeq->cqes[nvmeq->cq_head].command_id == tag)
+ found = true;
+ nvme_update_cq_head(nvmeq);
+ }
+ *end = nvmeq->cq_head;
+
+ if (*start != *end)
+ nvme_ring_cq_doorbell(nvmeq);
+ return found;
+}
+
+static bool nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
+{
+ u16 start, end;
+ bool found;
+
+ if (!nvme_cqe_pending(nvmeq))
+ return false;
+
+ found = nvme_process_cq(nvmeq, &start, &end, tag);
+
+ nvme_complete_cqes(nvmeq, start, end);
+ return found;
+}
+
+static int nvme_pci_submit_sync_cmd(struct nvme_ctrl *ctrl,
+ struct nvme_command *cmd,
+ union nvme_result *result,
+ void *buffer,
+ unsigned int buffer_len,
+ unsigned timeout, int qid)
+{
+ struct nvme_dev *dev = to_nvme_dev(ctrl);
+ struct nvme_queue *nvmeq = &dev->queues[qid];
+ struct nvme_request req = { };
+ const u16 tag = nvmeq->counter++ & (nvmeq->q_depth - 1);
+ enum dma_data_direction dma_dir;
+ int ret;
+
+ switch (qid) {
+ case NVME_QID_ADMIN:
+ switch (cmd->common.opcode) {
+ case nvme_admin_create_sq:
+ case nvme_admin_create_cq:
+ case nvme_admin_delete_sq:
+ case nvme_admin_delete_cq:
+ case nvme_admin_set_features:
+ dma_dir = DMA_TO_DEVICE;
+ break;
+ case nvme_admin_identify:
+ dma_dir = DMA_FROM_DEVICE;
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ case NVME_QID_IO:
+ switch (cmd->rw.opcode) {
+ case nvme_cmd_write:
+ dma_dir = DMA_TO_DEVICE;
+ break;
+ case nvme_cmd_read:
+ dma_dir = DMA_FROM_DEVICE;
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ cmd->common.command_id = tag;
+
+ timeout = timeout ?: ADMIN_TIMEOUT;
+
+ req.cmd = cmd;
+ req.buffer = buffer;
+ req.buffer_len = buffer_len;
+ req.dma_dir = dma_dir;
+
+ ret = nvme_map_data(dev, &req);
+ if (ret) {
+ dev_err(dev->dev, "Failed to map request data\n");
+ return ret;
+ }
+
+ nvme_submit_cmd(nvmeq, cmd);
+
+ nvmeq->req = &req;
+ ret = wait_on_timeout(timeout, nvme_poll(nvmeq, tag));
+ nvmeq->req = NULL;
+
+ nvme_unmap_data(dev, &req);
+
+ if (result)
+ *result = req.result;
+
+ return ret ?: req.status;
+}
+
+static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
+{
+ int result;
+ u32 aqa;
+ struct nvme_queue *nvmeq;
+
+ dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
+ NVME_CAP_NSSRC(dev->ctrl.cap) : 0;
+
+ if (dev->subsystem &&
+ (readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
+ writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
+
+ result = nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
+ if (result < 0)
+ return result;
+
+ result = nvme_alloc_queue(dev, NVME_QID_ADMIN, NVME_AQ_DEPTH);
+ if (result)
+ return result;
+
+ nvmeq = &dev->queues[NVME_QID_ADMIN];
+ aqa = nvmeq->q_depth - 1;
+ aqa |= aqa << 16;
+
+ writel(aqa, dev->bar + NVME_REG_AQA);
+ writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
+ writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);
+
+ result = nvme_enable_ctrl(&dev->ctrl, dev->ctrl.cap);
+ if (result)
+ return result;
+
+ nvme_init_queue(nvmeq, NVME_QID_ADMIN);
+
+ return result;
+}
+
+static int nvme_create_io_queues(struct nvme_dev *dev)
+{
+ unsigned i, max;
+ int ret = 0;
+
+ for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) {
+ if (nvme_alloc_queue(dev, i, dev->q_depth)) {
+ ret = -ENOMEM;
+ break;
+ }
+ }
+
+ max = min(dev->max_qid, dev->ctrl.queue_count - 1);
+ for (i = dev->online_queues; i <= max; i++) {
+ ret = nvme_create_queue(&dev->queues[i], i);
+ if (ret)
+ break;
+ }
+
+ /*
+ * Ignore failing Create SQ/CQ commands, we can continue with less
+ * than the desired amount of queues, and even a controller without
+ * I/O queues can still be used to issue admin commands. This might
+ * be useful to upgrade a buggy firmware for example.
+ */
+ return ret >= 0 ? 0 : ret;
+}
+
+static int nvme_setup_io_queues(struct nvme_dev *dev)
+{
+ int result, nr_io_queues;
+
+ nr_io_queues = NVME_QID_NUM - 1;
+ result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
+ if (result < 0)
+ return result;
+
+ dev->max_qid = nr_io_queues;
+
+ return nvme_create_io_queues(dev);
+}
+
+static int nvme_pci_enable(struct nvme_dev *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+ if (pci_enable_device(pdev))
+ return -ENOMEM;
+
+ pci_set_master(pdev);
+
+ if (readl(dev->bar + NVME_REG_CSTS) == -1)
+ return -ENODEV;
+
+ dev->ctrl.cap = readq(dev->bar + NVME_REG_CAP);
+
+ dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1,
+ io_queue_depth);
+ dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);
+ dev->dbs = dev->bar + 4096;
+
+ return 0;
+}
+
+static void nvme_reset_work(struct nvme_dev *dev)
+{
+ int result = -ENODEV;
+
+ result = nvme_pci_enable(dev);
+ if (result)
+ goto out;
+
+ result = nvme_pci_configure_admin_queue(dev);
+ if (result)
+ goto out;
+
+ /*
+ * Limit the max command size to prevent iod->sg allocations going
+ * over a single page.
+ */
+ dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1;
+
+ result = nvme_init_identify(&dev->ctrl);
+ if (result)
+ goto out;
+
+ result = nvme_setup_io_queues(dev);
+ if (result) {
+ dev_err(dev->ctrl.dev, "IO queues not created\n");
+ goto out;
+ }
+
+ nvme_start_ctrl(&dev->ctrl);
+out:
+ return;
+}
+
+static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
+{
+ *val = readl(to_nvme_dev(ctrl)->bar + off);
+ return 0;
+}
+
+static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
+{
+ writel(val, to_nvme_dev(ctrl)->bar + off);
+ return 0;
+}
+
+static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
+{
+ *val = readq(to_nvme_dev(ctrl)->bar + off);
+ return 0;
+}
+
+static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
+ .reg_read32 = nvme_pci_reg_read32,
+ .reg_write32 = nvme_pci_reg_write32,
+ .reg_read64 = nvme_pci_reg_read64,
+ .submit_sync_cmd = nvme_pci_submit_sync_cmd,
+};
+
+static void nvme_dev_map(struct nvme_dev *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+ dev->bar = pci_iomap(pdev, 0);
+}
+
+static void nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
+{
+ int ret;
+ ret = adapter_delete_queue(nvmeq->dev, opcode, nvmeq->qid);
+ if (ret < 0)
+ dev_err(nvmeq->dev->dev, "%s: %s\n", __func__,
+ strerror(-ret));
+ else if (ret)
+ dev_err(nvmeq->dev->dev,
+ "%s: status code type: %xh, status code %02xh\n",
+ __func__, (ret >> 8) & 0xf, ret & 0xff);
+}
+
+static void nvme_disable_io_queues(struct nvme_dev *dev)
+{
+ int i, queues = dev->online_queues - 1;
+
+ for (i = queues; i > 0; i--) {
+ nvme_delete_queue(&dev->queues[i], nvme_admin_delete_sq);
+ nvme_delete_queue(&dev->queues[i], nvme_admin_delete_cq);
+ }
+}
+
+static void nvme_disable_admin_queue(struct nvme_dev *dev)
+{
+ struct nvme_queue *nvmeq = &dev->queues[0];
+ u16 start, end;
+
+ nvme_shutdown_ctrl(&dev->ctrl);
+ nvme_process_cq(nvmeq, &start, &end, -1);
+ nvme_complete_cqes(nvmeq, start, end);
+}
+
+static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct nvme_dev *dev;
+ int result;
+
+ dev = xzalloc(sizeof(*dev));
+ dev->dev = &pdev->dev;
+ pdev->dev.priv = dev;
+
+ nvme_dev_map(dev);
+ result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops);
+ if (result)
+ return result;
+
+ nvme_reset_work(dev);
+
+ return 0;
+}
+
+static void nvme_remove(struct pci_dev *pdev)
+{
+ struct nvme_dev *dev = pdev->dev.priv;
+ bool dead = true;
+
+ u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+ dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY));
+
+ if (!dead && dev->ctrl.queue_count > 0) {
+ nvme_disable_io_queues(dev);
+ nvme_disable_admin_queue(dev);
+ }
+}
+
+static const struct pci_device_id nvme_id_table[] = {
+ { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, PCI_ANY_ID) },
+ { 0, },
+};
+
+static struct pci_driver nvme_driver = {
+ .name = "nvme",
+ .id_table = nvme_id_table,
+ .probe = nvme_probe,
+ .remove = nvme_remove,
+};
+device_pci_driver(nvme_driver);