nvme-rdma: use intelligent affinity based queue mappings

author Sagi Grimberg <sagi@grimberg.me>

Thu, 13 Jul 2017 08:09:44 +0000 (11:09 +0300)

committer Doug Ledford <dledford@redhat.com>

Tue, 8 Aug 2017 18:58:03 +0000 (14:58 -0400)
author Sagi Grimberg <sagi@grimberg.me>
Thu, 13 Jul 2017 08:09:44 +0000 (11:09 +0300)
committer Doug Ledford <dledford@redhat.com>
Tue, 8 Aug 2017 18:58:03 +0000 (14:58 -0400)
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c

index da04df1af231758cb4965735c417a215d736ec94..4e25acc54e093b1b6ba294de79d5fa0be9f25541 100644 (file)
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -19,6 +19,7 @@
  #include <linux/string.h>
  #include <linux/atomic.h>
  #include <linux/blk-mq.h>
+#include <linux/blk-mq-rdma.h>
  #include <linux/types.h>
  #include <linux/list.h>
  #include <linux/mutex.h>
@@ -463,14 +464,10 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
         ibdev = queue->device->dev;
  
         /*
-        * The admin queue is barely used once the controller is live, so don't
-        * bother to spread it out.
+        * Spread I/O queues completion vectors according their queue index.
+        * Admin queues can always go on completion vector 0.
          */
-       if (idx == 0)
-               comp_vector = 0;
-       else
-               comp_vector = idx % ibdev->num_comp_vectors;
-
+       comp_vector = idx == 0 ? idx : idx - 1;
  
         /* +1 for ib_stop_cq */
         queue->ib_cq = ib_alloc_cq(ibdev, queue,
@@ -611,10 +608,20 @@ out_free_queues:
  static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl)
  {
         struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+       struct ib_device *ibdev = ctrl->device->dev;
         unsigned int nr_io_queues;
         int i, ret;
  
         nr_io_queues = min(opts->nr_io_queues, num_online_cpus());
+
+       /*
+        * we map queues according to the device irq vectors for
+        * optimal locality so we don't need more queues than
+        * completion vectors.
+        */
+       nr_io_queues = min_t(unsigned int, nr_io_queues,
+                               ibdev->num_comp_vectors);
+
         ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
         if (ret)
                 return ret;
@@ -1498,6 +1505,13 @@ static void nvme_rdma_complete_rq(struct request *rq)
         nvme_complete_rq(rq);
  }
  
+static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
+{
+       struct nvme_rdma_ctrl *ctrl = set->driver_data;
+
+       return blk_mq_rdma_map_queues(set, ctrl->device->dev, 0);
+}
+
  static const struct blk_mq_ops nvme_rdma_mq_ops = {
         .queue_rq       = nvme_rdma_queue_rq,
         .complete       = nvme_rdma_complete_rq,
@@ -1507,6 +1521,7 @@ static const struct blk_mq_ops nvme_rdma_mq_ops = {
         .init_hctx      = nvme_rdma_init_hctx,
         .poll           = nvme_rdma_poll,
         .timeout        = nvme_rdma_timeout,
+       .map_queues     = nvme_rdma_map_queues,
  };
  
  static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
author	Sagi Grimberg <sagi@grimberg.me>
	Thu, 13 Jul 2017 08:09:44 +0000 (11:09 +0300)
committer	Doug Ledford <dledford@redhat.com>
	Tue, 8 Aug 2017 18:58:03 +0000 (14:58 -0400)