nvme-rdma: rework queue maps handling
authorSagi Grimberg <sagi@grimberg.me>
Sat, 19 Jan 2019 00:43:24 +0000 (16:43 -0800)
committerJens Axboe <axboe@kernel.dk>
Thu, 24 Jan 2019 00:16:59 +0000 (17:16 -0700)
If the device supports less queues than provided (if the device has less
completion vectors), we might hit a bug due to the fact that we ignore
that in nvme_rdma_map_queues (we override the maps nr_queues with user
opts).

Instead, keep track of how many default/read/poll queues we actually
allocated (rather than asked by the user) and use that to assign our
queue mappings.

Fixes: b65bb777ef22 (" nvme-rdma: support separate queue maps for read and write")
Reported-by: Saleem, Shiraz <shiraz.saleem@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
drivers/nvme/host/rdma.c

index 4101961feb44a42d864cdc769d3ccec7bf8d294b..52abc3a6de129cab702ee1ca488bf8946940657e 100644 (file)
@@ -119,6 +119,7 @@ struct nvme_rdma_ctrl {
 
        struct nvme_ctrl        ctrl;
        bool                    use_inline_data;
+       u32                     io_queues[HCTX_MAX_TYPES];
 };
 
 static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl)
@@ -165,8 +166,8 @@ static inline int nvme_rdma_queue_idx(struct nvme_rdma_queue *queue)
 static bool nvme_rdma_poll_queue(struct nvme_rdma_queue *queue)
 {
        return nvme_rdma_queue_idx(queue) >
-               queue->ctrl->ctrl.opts->nr_io_queues +
-               queue->ctrl->ctrl.opts->nr_write_queues;
+               queue->ctrl->io_queues[HCTX_TYPE_DEFAULT] +
+               queue->ctrl->io_queues[HCTX_TYPE_READ];
 }
 
 static inline size_t nvme_rdma_inline_data_size(struct nvme_rdma_queue *queue)
@@ -661,8 +662,21 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
        nr_io_queues = min_t(unsigned int, nr_io_queues,
                                ibdev->num_comp_vectors);
 
-       nr_io_queues += min(opts->nr_write_queues, num_online_cpus());
-       nr_io_queues += min(opts->nr_poll_queues, num_online_cpus());
+       if (opts->nr_write_queues) {
+               ctrl->io_queues[HCTX_TYPE_DEFAULT] =
+                               min(opts->nr_write_queues, nr_io_queues);
+               nr_io_queues += ctrl->io_queues[HCTX_TYPE_DEFAULT];
+       } else {
+               ctrl->io_queues[HCTX_TYPE_DEFAULT] = nr_io_queues;
+       }
+
+       ctrl->io_queues[HCTX_TYPE_READ] = nr_io_queues;
+
+       if (opts->nr_poll_queues) {
+               ctrl->io_queues[HCTX_TYPE_POLL] =
+                       min(opts->nr_poll_queues, num_online_cpus());
+               nr_io_queues += ctrl->io_queues[HCTX_TYPE_POLL];
+       }
 
        ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
        if (ret)
@@ -1789,17 +1803,15 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
        struct nvme_rdma_ctrl *ctrl = set->driver_data;
 
        set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
-       set->map[HCTX_TYPE_READ].nr_queues = ctrl->ctrl.opts->nr_io_queues;
+       set->map[HCTX_TYPE_DEFAULT].nr_queues =
+                       ctrl->io_queues[HCTX_TYPE_DEFAULT];
+       set->map[HCTX_TYPE_READ].nr_queues = ctrl->io_queues[HCTX_TYPE_READ];
        if (ctrl->ctrl.opts->nr_write_queues) {
                /* separate read/write queues */
-               set->map[HCTX_TYPE_DEFAULT].nr_queues =
-                               ctrl->ctrl.opts->nr_write_queues;
                set->map[HCTX_TYPE_READ].queue_offset =
-                               ctrl->ctrl.opts->nr_write_queues;
+                               ctrl->io_queues[HCTX_TYPE_DEFAULT];
        } else {
                /* mixed read/write queues */
-               set->map[HCTX_TYPE_DEFAULT].nr_queues =
-                               ctrl->ctrl.opts->nr_io_queues;
                set->map[HCTX_TYPE_READ].queue_offset = 0;
        }
        blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_DEFAULT],
@@ -1809,12 +1821,12 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
 
        if (ctrl->ctrl.opts->nr_poll_queues) {
                set->map[HCTX_TYPE_POLL].nr_queues =
-                               ctrl->ctrl.opts->nr_poll_queues;
+                               ctrl->io_queues[HCTX_TYPE_POLL];
                set->map[HCTX_TYPE_POLL].queue_offset =
-                               ctrl->ctrl.opts->nr_io_queues;
+                               ctrl->io_queues[HCTX_TYPE_DEFAULT];
                if (ctrl->ctrl.opts->nr_write_queues)
                        set->map[HCTX_TYPE_POLL].queue_offset +=
-                               ctrl->ctrl.opts->nr_write_queues;
+                               ctrl->io_queues[HCTX_TYPE_READ];
                blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
        }
        return 0;