nvme-rdma: teardown admin/io queues once on error recovery
authorSagi Grimberg <sagi@grimberg.me>
Wed, 11 Oct 2017 12:29:11 +0000 (15:29 +0300)
committerChristoph Hellwig <hch@lst.de>
Wed, 18 Oct 2017 17:27:53 +0000 (19:27 +0200)
Relying on the queue state while tearing down on every reconnect
attempt is not a good design. We should do it once in err_work
and simply try to establish the queues for each reconnect attempt.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
drivers/nvme/host/rdma.c

index 7e61e6447d2cc45b6f76d09c57360585895f5089..95837c5317b41bb3e4f8ec8a239d67197311fae4 100644 (file)
@@ -925,10 +925,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 
        ++ctrl->ctrl.nr_reconnects;
 
-       if (ctrl->ctrl.queue_count > 1)
-               nvme_rdma_destroy_io_queues(ctrl, false);
-
-       nvme_rdma_destroy_admin_queue(ctrl, false);
        ret = nvme_rdma_configure_admin_queue(ctrl, false);
        if (ret)
                goto requeue;
@@ -936,7 +932,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
        if (ctrl->ctrl.queue_count > 1) {
                ret = nvme_rdma_configure_io_queues(ctrl, false);
                if (ret)
-                       goto requeue;
+                       goto destroy_admin;
        }
 
        changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
@@ -946,14 +942,17 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
                return;
        }
 
-       ctrl->ctrl.nr_reconnects = 0;
-
        nvme_start_ctrl(&ctrl->ctrl);
 
-       dev_info(ctrl->ctrl.device, "Successfully reconnected\n");
+       dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
+                       ctrl->ctrl.nr_reconnects);
+
+       ctrl->ctrl.nr_reconnects = 0;
 
        return;
 
+destroy_admin:
+       nvme_rdma_destroy_admin_queue(ctrl, false);
 requeue:
        dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
                        ctrl->ctrl.nr_reconnects);
@@ -969,17 +968,15 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 
        if (ctrl->ctrl.queue_count > 1) {
                nvme_stop_queues(&ctrl->ctrl);
-               nvme_rdma_stop_io_queues(ctrl);
-       }
-       blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
-       nvme_rdma_stop_queue(&ctrl->queues[0]);
-
-       /* We must take care of fastfail/requeue all our inflight requests */
-       if (ctrl->ctrl.queue_count > 1)
                blk_mq_tagset_busy_iter(&ctrl->tag_set,
                                        nvme_cancel_request, &ctrl->ctrl);
+               nvme_rdma_destroy_io_queues(ctrl, false);
+       }
+
+       blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
        blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
                                nvme_cancel_request, &ctrl->ctrl);
+       nvme_rdma_destroy_admin_queue(ctrl, false);
 
        /*
         * queues are not a live anymore, so restart the queues to fail fast