nvme: Complete all stuck requests
authorKeith Busch <keith.busch@intel.com>
Wed, 1 Mar 2017 19:22:12 +0000 (14:22 -0500)
committerJens Axboe <axboe@fb.com>
Thu, 2 Mar 2017 15:56:59 +0000 (08:56 -0700)
If the nvme driver is shutting down its controller, the drievr will not
start the queues up again, preventing blk-mq's hot CPU notifier from
making forward progress.

To fix that, this patch starts a request_queue freeze when the driver
resets a controller so no new requests may enter. The driver will wait
for frozen after IO queues are restarted to ensure the queue reference
can be reinitialized when nvme requests to unfreeze the queues.

If the driver is doing a safe shutdown, the driver will wait for the
controller to successfully complete all inflight requests so that we
don't unnecessarily fail them. Once the controller has been disabled,
the queues will be restarted to force remaining entered requests to end
in failure so that blk-mq's hot cpu notifier may progress.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
drivers/nvme/host/core.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c

index 25ec4e58522058f70a302ad02811abccf5cb4e1a..9b3b57fef446dc753c966c90fc2529bc9f846dd8 100644 (file)
@@ -2344,6 +2344,53 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_kill_queues);
 
+void nvme_unfreeze(struct nvme_ctrl *ctrl)
+{
+       struct nvme_ns *ns;
+
+       mutex_lock(&ctrl->namespaces_mutex);
+       list_for_each_entry(ns, &ctrl->namespaces, list)
+               blk_mq_unfreeze_queue(ns->queue);
+       mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_unfreeze);
+
+void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
+{
+       struct nvme_ns *ns;
+
+       mutex_lock(&ctrl->namespaces_mutex);
+       list_for_each_entry(ns, &ctrl->namespaces, list) {
+               timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout);
+               if (timeout <= 0)
+                       break;
+       }
+       mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);
+
+void nvme_wait_freeze(struct nvme_ctrl *ctrl)
+{
+       struct nvme_ns *ns;
+
+       mutex_lock(&ctrl->namespaces_mutex);
+       list_for_each_entry(ns, &ctrl->namespaces, list)
+               blk_mq_freeze_queue_wait(ns->queue);
+       mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_wait_freeze);
+
+void nvme_start_freeze(struct nvme_ctrl *ctrl)
+{
+       struct nvme_ns *ns;
+
+       mutex_lock(&ctrl->namespaces_mutex);
+       list_for_each_entry(ns, &ctrl->namespaces, list)
+               blk_mq_freeze_queue_start(ns->queue);
+       mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_start_freeze);
+
 void nvme_stop_queues(struct nvme_ctrl *ctrl)
 {
        struct nvme_ns *ns;
index a3da1e90b99dbf1bb04177379c65567c992c2dfd..2aa20e3e5675bf14a8aaf8784bafc344b970a052 100644 (file)
@@ -294,6 +294,10 @@ void nvme_queue_async_events(struct nvme_ctrl *ctrl);
 void nvme_stop_queues(struct nvme_ctrl *ctrl);
 void nvme_start_queues(struct nvme_ctrl *ctrl);
 void nvme_kill_queues(struct nvme_ctrl *ctrl);
+void nvme_unfreeze(struct nvme_ctrl *ctrl);
+void nvme_wait_freeze(struct nvme_ctrl *ctrl);
+void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
+void nvme_start_freeze(struct nvme_ctrl *ctrl);
 
 #define NVME_QID_ANY -1
 struct request *nvme_alloc_request(struct request_queue *q,
index eee8f8426ff23433f9557e318056bc03ecf3de3d..26a5fd05fe88aa003a00dc4ece6e9900bd95e618 100644 (file)
@@ -1675,21 +1675,34 @@ static void nvme_pci_disable(struct nvme_dev *dev)
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 {
        int i, queues;
-       u32 csts = -1;
+       bool dead = true;
+       struct pci_dev *pdev = to_pci_dev(dev->dev);
 
        del_timer_sync(&dev->watchdog_timer);
 
        mutex_lock(&dev->shutdown_lock);
-       if (pci_is_enabled(to_pci_dev(dev->dev))) {
-               nvme_stop_queues(&dev->ctrl);
-               csts = readl(dev->bar + NVME_REG_CSTS);
+       if (pci_is_enabled(pdev)) {
+               u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+               if (dev->ctrl.state == NVME_CTRL_LIVE)
+                       nvme_start_freeze(&dev->ctrl);
+               dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
+                       pdev->error_state  != pci_channel_io_normal);
        }
 
+       /*
+        * Give the controller a chance to complete all entered requests if
+        * doing a safe shutdown.
+        */
+       if (!dead && shutdown)
+               nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+       nvme_stop_queues(&dev->ctrl);
+
        queues = dev->online_queues - 1;
        for (i = dev->queue_count - 1; i > 0; i--)
                nvme_suspend_queue(dev->queues[i]);
 
-       if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
+       if (dead) {
                /* A device might become IO incapable very soon during
                 * probe, before the admin queue is configured. Thus,
                 * queue_count can be 0 here.
@@ -1704,6 +1717,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 
        blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
        blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
+
+       /*
+        * The driver will not be starting up queues again if shutting down so
+        * must flush all entered requests to their failed completion to avoid
+        * deadlocking blk-mq hot-cpu notifier.
+        */
+       if (shutdown)
+               nvme_start_queues(&dev->ctrl);
        mutex_unlock(&dev->shutdown_lock);
 }
 
@@ -1826,7 +1847,9 @@ static void nvme_reset_work(struct work_struct *work)
                nvme_remove_namespaces(&dev->ctrl);
        } else {
                nvme_start_queues(&dev->ctrl);
+               nvme_wait_freeze(&dev->ctrl);
                nvme_dev_add(dev);
+               nvme_unfreeze(&dev->ctrl);
        }
 
        if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {