static int use_threaded_interrupts;
module_param(use_threaded_interrupts, int, 0);
+static bool use_cmb_sqes = true;
+module_param(use_cmb_sqes, bool, 0644);
+MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
+
static DEFINE_SPINLOCK(dev_list_lock);
static LIST_HEAD(dev_list);
static struct task_struct *nvme_thread;
char irqname[24]; /* nvme4294967295-65535\0 */
spinlock_t q_lock;
struct nvme_command *sq_cmds;
+ struct nvme_command __iomem *sq_cmds_io;
volatile struct nvme_completion *cqes;
struct blk_mq_tags **tags;
dma_addr_t sq_dma_addr;
{
u16 tail = nvmeq->sq_tail;
- memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
+ if (nvmeq->sq_cmds_io)
+ memcpy_toio(&nvmeq->sq_cmds_io[tail], cmd, sizeof(*cmd));
+ else
+ memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
+
if (++tail == nvmeq->q_depth)
tail = 0;
writel(tail, nvmeq->q_db);
{
dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
- dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
+ if (nvmeq->sq_cmds)
+ dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
nvmeq->sq_cmds, nvmeq->sq_dma_addr);
kfree(nvmeq);
}
spin_unlock_irq(&nvmeq->q_lock);
}
+static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
+ int entry_size)
+{
+ int q_depth = dev->q_depth;
+ unsigned q_size_aligned = roundup(q_depth * entry_size, dev->page_size);
+
+ if (q_size_aligned * nr_io_queues > dev->cmb_size) {
+ q_depth = rounddown(dev->cmb_size / nr_io_queues,
+ dev->page_size) / entry_size;
+
+ /*
+ * Ensure the reduced q_depth is above some threshold where it
+ * would be better to map queues in system memory with the
+ * original depth
+ */
+ if (q_depth < 64)
+ return -ENOMEM;
+ }
+
+ return q_depth;
+}
+
+static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
+ int qid, int depth)
+{
+ if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) {
+ unsigned offset = (qid - 1) *
+ roundup(SQ_SIZE(depth), dev->page_size);
+ nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset;
+ nvmeq->sq_cmds_io = dev->cmb + offset;
+ } else {
+ nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
+ &nvmeq->sq_dma_addr, GFP_KERNEL);
+ if (!nvmeq->sq_cmds)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
int depth)
{
if (!nvmeq->cqes)
goto free_nvmeq;
- nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
- &nvmeq->sq_dma_addr, GFP_KERNEL);
- if (!nvmeq->sq_cmds)
+ if (nvme_alloc_sq_cmds(dev, nvmeq, qid, depth))
goto free_cqdma;
nvmeq->q_dmadev = dev->dev;
return min(result & 0xffff, result >> 16) + 1;
}
+static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
+{
+ u64 szu, size, offset;
+ u32 cmbloc;
+ resource_size_t bar_size;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+ void __iomem *cmb;
+ dma_addr_t dma_addr;
+
+ if (!use_cmb_sqes)
+ return NULL;
+
+ dev->cmbsz = readl(&dev->bar->cmbsz);
+ if (!(NVME_CMB_SZ(dev->cmbsz)))
+ return NULL;
+
+ cmbloc = readl(&dev->bar->cmbloc);
+
+ szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz));
+ size = szu * NVME_CMB_SZ(dev->cmbsz);
+ offset = szu * NVME_CMB_OFST(cmbloc);
+ bar_size = pci_resource_len(pdev, NVME_CMB_BIR(cmbloc));
+
+ if (offset > bar_size)
+ return NULL;
+
+ /*
+ * Controllers may support a CMB size larger than their BAR,
+ * for example, due to being behind a bridge. Reduce the CMB to
+ * the reported size of the BAR
+ */
+ if (size > bar_size - offset)
+ size = bar_size - offset;
+
+ dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(cmbloc)) + offset;
+ cmb = ioremap_wc(dma_addr, size);
+ if (!cmb)
+ return NULL;
+
+ dev->cmb_dma_addr = dma_addr;
+ dev->cmb_size = size;
+ return cmb;
+}
+
+static inline void nvme_release_cmb(struct nvme_dev *dev)
+{
+ if (dev->cmb) {
+ iounmap(dev->cmb);
+ dev->cmb = NULL;
+ }
+}
+
static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
{
return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
if (result < nr_io_queues)
nr_io_queues = result;
+ if (dev->cmb && NVME_CMB_SQS(dev->cmbsz)) {
+ result = nvme_cmb_qdepth(dev, nr_io_queues,
+ sizeof(struct nvme_command));
+ if (result > 0)
+ dev->q_depth = result;
+ else
+ nvme_release_cmb(dev);
+ }
+
size = db_bar_size(dev, nr_io_queues);
if (size > 8192) {
iounmap(dev->bar);
dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
dev->dbs = ((void __iomem *)dev->bar) + 4096;
+ if (readl(&dev->bar->vs) >= NVME_VS(1, 2))
+ dev->cmb = nvme_map_cmb(dev);
return 0;
nvme_dev_remove_admin(dev);
device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
nvme_free_queues(dev, 0);
+ nvme_release_cmb(dev);
nvme_release_prp_pools(dev);
kref_put(&dev->kref, nvme_free_dev);
}