nvme-fc: change controllers first connect to use reconnect path

author James Smart <jsmart2021@gmail.com>

Wed, 13 Jun 2018 21:07:37 +0000 (14:07 -0700)

committer Christoph Hellwig <hch@lst.de>

Thu, 14 Jun 2018 12:25:09 +0000 (14:25 +0200)
author James Smart <jsmart2021@gmail.com>
Wed, 13 Jun 2018 21:07:37 +0000 (14:07 -0700)
committer Christoph Hellwig <hch@lst.de>
Thu, 14 Jun 2018 12:25:09 +0000 (14:25 +0200)
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c

index 0bad65803271ff68bc883e0dd16c78b8386fabf8..9d826b726425d11231db947bcb2d9387fd07caa5 100644 (file)
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -142,6 +142,7 @@ struct nvme_fc_ctrl {
         struct nvme_fc_rport    *rport;
         u32                     cnum;
  
+       bool                    ioq_live;
         bool                    assoc_active;
         u64                     association_id;
  
@@ -2463,6 +2464,8 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
         if (ret)
                 goto out_delete_hw_queues;
  
+       ctrl->ioq_live = true;
+
         return 0;
  
  out_delete_hw_queues:
@@ -2615,8 +2618,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
         if (ret)
                 goto out_delete_hw_queue;
  
-       if (ctrl->ctrl.state != NVME_CTRL_NEW)
-               blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
+       blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
  
         ret = nvmf_connect_admin_queue(&ctrl->ctrl);
         if (ret)
@@ -2689,7 +2691,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
          */
  
         if (ctrl->ctrl.queue_count > 1) {
-               if (ctrl->ctrl.state == NVME_CTRL_NEW)
+               if (!ctrl->ioq_live)
                         ret = nvme_fc_create_io_queues(ctrl);
                 else
                         ret = nvme_fc_reinit_io_queues(ctrl);
@@ -2776,8 +2778,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
          * use blk_mq_tagset_busy_itr() and the transport routine to
          * terminate the exchanges.
          */
-       if (ctrl->ctrl.state != NVME_CTRL_NEW)
-               blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
+       blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
         blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
                                 nvme_fc_terminate_exchange, &ctrl->ctrl);
  
@@ -2934,7 +2935,7 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
                 nvme_fc_reconnect_or_delete(ctrl, ret);
         else
                 dev_info(ctrl->ctrl.device,
-                       "NVME-FC{%d}: controller reconnect complete\n",
+                       "NVME-FC{%d}: controller connect complete\n",
                         ctrl->cnum);
  }
  
@@ -2982,7 +2983,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
  {
         struct nvme_fc_ctrl *ctrl;
         unsigned long flags;
-       int ret, idx, retry;
+       int ret, idx;
  
         if (!(rport->remoteport.port_role &
             (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) {
@@ -3009,11 +3010,13 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
         }
  
         ctrl->ctrl.opts = opts;
+       ctrl->ctrl.nr_reconnects = 0;
         INIT_LIST_HEAD(&ctrl->ctrl_list);
         ctrl->lport = lport;
         ctrl->rport = rport;
         ctrl->dev = lport->dev;
         ctrl->cnum = idx;
+       ctrl->ioq_live = false;
         ctrl->assoc_active = false;
         init_waitqueue_head(&ctrl->ioabort_wait);
  
@@ -3032,6 +3035,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
  
         ctrl->ctrl.sqsize = opts->queue_size - 1;
         ctrl->ctrl.kato = opts->kato;
+       ctrl->ctrl.cntlid = 0xffff;
  
         ret = -ENOMEM;
         ctrl->queues = kcalloc(ctrl->ctrl.queue_count,
@@ -3081,62 +3085,24 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
         list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list);
         spin_unlock_irqrestore(&rport->lock, flags);
  
-       /*
-        * It's possible that transactions used to create the association
-        * may fail. Examples: CreateAssociation LS or CreateIOConnection
-        * LS gets dropped/corrupted/fails; or a frame gets dropped or a
-        * command times out for one of the actions to init the controller
-        * (Connect, Get/Set_Property, Set_Features, etc). Many of these
-        * transport errors (frame drop, LS failure) inherently must kill
-        * the association. The transport is coded so that any command used
-        * to create the association (prior to a LIVE state transition
-        * while NEW or CONNECTING) will fail if it completes in error or
-        * times out.
-        *
-        * As such: as the connect request was mostly likely due to a
-        * udev event that discovered the remote port, meaning there is
-        * not an admin or script there to restart if the connect
-        * request fails, retry the initial connection creation up to
-        * three times before giving up and declaring failure.
-        */
-       for (retry = 0; retry < 3; retry++) {
-               ret = nvme_fc_create_association(ctrl);
-               if (!ret)
-                       break;
-       }
-
-       if (ret) {
-               nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
-               cancel_work_sync(&ctrl->ctrl.reset_work);
-               cancel_delayed_work_sync(&ctrl->connect_work);
-
-               /* couldn't schedule retry - fail out */
+       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING) ||
+           !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
                 dev_err(ctrl->ctrl.device,
-                       "NVME-FC{%d}: Connect retry failed\n", ctrl->cnum);
-
-               ctrl->ctrl.opts = NULL;
+                       "NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum);
+               goto fail_ctrl;
+       }
  
-               /* initiate nvme ctrl ref counting teardown */
-               nvme_uninit_ctrl(&ctrl->ctrl);
+       nvme_get_ctrl(&ctrl->ctrl);
  
-               /* Remove core ctrl ref. */
+       if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) {
                 nvme_put_ctrl(&ctrl->ctrl);
-
-               /* as we're past the point where we transition to the ref
-                * counting teardown path, if we return a bad pointer here,
-                * the calling routine, thinking it's prior to the
-                * transition, will do an rport put. Since the teardown
-                * path also does a rport put, we do an extra get here to
-                * so proper order/teardown happens.
-                */
-               nvme_fc_rport_get(rport);
-
-               if (ret > 0)
-                       ret = -EIO;
-               return ERR_PTR(ret);
+               dev_err(ctrl->ctrl.device,
+                       "NVME-FC{%d}: failed to schedule initial connect\n",
+                       ctrl->cnum);
+               goto fail_ctrl;
         }
  
-       nvme_get_ctrl(&ctrl->ctrl);
+       flush_delayed_work(&ctrl->connect_work);
  
         dev_info(ctrl->ctrl.device,
                 "NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
@@ -3144,6 +3110,30 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
  
         return &ctrl->ctrl;
  
+fail_ctrl:
+       nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
+       cancel_work_sync(&ctrl->ctrl.reset_work);
+       cancel_delayed_work_sync(&ctrl->connect_work);
+
+       ctrl->ctrl.opts = NULL;
+
+       /* initiate nvme ctrl ref counting teardown */
+       nvme_uninit_ctrl(&ctrl->ctrl);
+
+       /* Remove core ctrl ref. */
+       nvme_put_ctrl(&ctrl->ctrl);
+
+       /* as we're past the point where we transition to the ref
+        * counting teardown path, if we return a bad pointer here,
+        * the calling routine, thinking it's prior to the
+        * transition, will do an rport put. Since the teardown
+        * path also does a rport put, we do an extra get here to
+        * so proper order/teardown happens.
+        */
+       nvme_fc_rport_get(rport);
+
+       return ERR_PTR(-EIO);
+
  out_cleanup_admin_q:
         blk_cleanup_queue(ctrl->ctrl.admin_q);
  out_free_admin_tag_set:
author	James Smart <jsmart2021@gmail.com>
	Wed, 13 Jun 2018 21:07:37 +0000 (14:07 -0700)
committer	Christoph Hellwig <hch@lst.de>
	Thu, 14 Jun 2018 12:25:09 +0000 (14:25 +0200)