scsi: lpfc: Allow override of hardware queue selection policies
authorJames Smart <jsmart2021@gmail.com>
Mon, 28 Jan 2019 19:14:29 +0000 (11:14 -0800)
committerMartin K. Petersen <martin.petersen@oracle.com>
Wed, 6 Feb 2019 03:29:09 +0000 (22:29 -0500)
Default behavior is to use the information from the upper IO stacks to
select the hardware queue to use for IO submission.  Which typically has
good cpu affinity.

However, the driver, when used on some variants of the upstream kernel, has
found queuing information to be suboptimal for FCP or IO completion locked
on particular cpus.

For command submission situations, the lpfc_fcp_io_sched module parameter
can be set to specify a hardware queue selection policy that overrides the
os stack information.

For IO completion situations, rather than queing cq processing based on the
cpu servicing the interrupting event, schedule the cq processing on the cpu
associated with the hardware queue's cq.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/lpfc/lpfc_attr.c
drivers/scsi/lpfc/lpfc_hw4.h
drivers/scsi/lpfc/lpfc_nvme.c
drivers/scsi/lpfc/lpfc_scsi.c
drivers/scsi/lpfc/lpfc_sli.c

index 47aa2af885a40d0e7617f02e9e451c5086b542f2..93a96491899c8e9fd6484d3401f131ff879ef958 100644 (file)
@@ -5275,11 +5275,12 @@ LPFC_ATTR_R(xri_rebalancing, 1, 0, 1, "Enable/Disable XRI rebalancing");
 /*
  * lpfc_io_sched: Determine scheduling algrithmn for issuing FCP cmds
  * range is [0,1]. Default value is 0.
- * For [0], FCP commands are issued to Work Queues ina round robin fashion.
+ * For [0], FCP commands are issued to Work Queues based on upper layer
+ * hardware queue index.
  * For [1], FCP commands are issued to a Work Queue associated with the
  *          current CPU.
  *
- * LPFC_FCP_SCHED_ROUND_ROBIN == 0
+ * LPFC_FCP_SCHED_BY_HDWQ == 0
  * LPFC_FCP_SCHED_BY_CPU == 1
  *
  * The driver dynamically sets this to 1 (BY_CPU) if it's able to set up cpu
@@ -5287,11 +5288,11 @@ LPFC_ATTR_R(xri_rebalancing, 1, 0, 1, "Enable/Disable XRI rebalancing");
  * CPU. Otherwise, the default 0 (Round Robin) scheduling of FCP/NVME I/Os
  * through WQs will be used.
  */
-LPFC_ATTR_RW(fcp_io_sched, LPFC_FCP_SCHED_ROUND_ROBIN,
-            LPFC_FCP_SCHED_ROUND_ROBIN,
+LPFC_ATTR_RW(fcp_io_sched, LPFC_FCP_SCHED_BY_HDWQ,
+            LPFC_FCP_SCHED_BY_HDWQ,
             LPFC_FCP_SCHED_BY_CPU,
             "Determine scheduling algorithm for "
-            "issuing commands [0] - Round Robin, [1] - Current CPU");
+            "issuing commands [0] - Hardware Queue, [1] - Current CPU");
 
 /*
  * lpfc_ns_query: Determine algrithmn for NameServer queries after RSCN
index c15b9b6fb8400fb05002d868e8c5b5c76ccd3c34..cd39845c909f3894fb11100a411aabfa9fe18b4f 100644 (file)
@@ -194,7 +194,7 @@ struct lpfc_sli_intf {
 #define LPFC_ACT_INTR_CNT      4
 
 /* Algrithmns for scheduling FCP commands to WQs */
-#define        LPFC_FCP_SCHED_ROUND_ROBIN      0
+#define        LPFC_FCP_SCHED_BY_HDWQ          0
 #define        LPFC_FCP_SCHED_BY_CPU           1
 
 /* Algrithmns for NameServer Query after RSCN */
index 0c6c91d39e2ff7c1507156ae22cb27a4aa8d2eef..c9aacd56a449982c43e5165db2d4dc391a330947 100644 (file)
@@ -1546,8 +1546,17 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
                }
        }
 
-       lpfc_ncmd = lpfc_get_nvme_buf(phba, ndlp,
-                                     lpfc_queue_info->index, expedite);
+       if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ) {
+               idx = lpfc_queue_info->index;
+       } else {
+               cpu = smp_processor_id();
+               if (cpu < phba->cfg_hdw_queue)
+                       idx = cpu;
+               else
+                       idx = cpu % phba->cfg_hdw_queue;
+       }
+
+       lpfc_ncmd = lpfc_get_nvme_buf(phba, ndlp, idx, expedite);
        if (lpfc_ncmd == NULL) {
                atomic_inc(&lport->xmt_fcp_noxri);
                lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
@@ -1585,7 +1594,6 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
         * index to use and that they have affinitized a CPU to this hardware
         * queue. A hardware queue maps to a driver MSI-X vector/EQ/CQ/WQ.
         */
-       idx = lpfc_queue_info->index;
        lpfc_ncmd->cur_iocbq.hba_wqidx = idx;
        cstat = &phba->sli4_hba.hdwq[idx].nvme_cstat;
 
index c824ed3be4f983899f5909a727c19f2d4358ae1a..7b22cc995d7fc83ec53ca71b22176848f04d1b05 100644 (file)
@@ -688,7 +688,7 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
        int tag;
 
        cpu = smp_processor_id();
-       if (cmnd) {
+       if (cmnd && phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ) {
                tag = blk_mq_unique_tag(cmnd->request);
                idx = blk_mq_unique_tag_to_hwq(tag);
        } else {
index 4443d0d430538178ff4fba2922dfa06c8206fc1e..c0f0adccdea7ee96230594263fb454d4430afc82 100644 (file)
@@ -14106,7 +14106,7 @@ process_cq:
        /* Save EQ associated with this CQ */
        cq->assoc_qp = phba->sli4_hba.hdwq[qidx].hba_eq;
 
-       if (!queue_work(phba->wq, &cq->irqwork))
+       if (!queue_work_on(cq->chann, phba->wq, &cq->irqwork))
                lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
                                "0363 Cannot schedule soft IRQ "
                                "for CQ eqcqid=%d, cqid=%d on CPU %d\n",