IB/hfi1: Optimize kthread pointer locking when queuing CQ entries
authorSebastian Sanchez <sebastian.sanchez@intel.com>
Wed, 2 May 2018 13:43:39 +0000 (06:43 -0700)
committerDoug Ledford <dledford@redhat.com>
Wed, 9 May 2018 19:53:30 +0000 (15:53 -0400)
All threads queuing CQ entries on different CQs are unnecessarily
synchronized by a spin lock to check if the CQ kthread worker hasn't
been destroyed before queuing an CQ entry.

The lock used in 6efaf10f163d ("IB/rdmavt: Avoid queuing work into a
destroyed cq kthread worker") is a device global lock and will have
poor performance at scale as completions are entered from a large
number of CPUs.

Convert to use RCU where the read side of RCU is rvt_cq_enter() to
determine that the worker is alive prior to triggering the
completion event.
Apply write side RCU semantics in rvt_driver_cq_init() and
rvt_cq_exit().

Fixes: 6efaf10f163d ("IB/rdmavt: Avoid queuing work into a destroyed cq kthread worker")
Cc: <stable@vger.kernel.org> # 4.14.x
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/sw/rdmavt/cq.c
include/rdma/rdma_vt.h

index fb52b669bfce5ce4e9ee512902270197fb319632..340c17aba3b0771ce2a7fb467cb7f6f8588e900a 100644 (file)
@@ -120,17 +120,20 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
        if (cq->notify == IB_CQ_NEXT_COMP ||
            (cq->notify == IB_CQ_SOLICITED &&
             (solicited || entry->status != IB_WC_SUCCESS))) {
+               struct kthread_worker *worker;
+
                /*
                 * This will cause send_complete() to be called in
                 * another thread.
                 */
-               spin_lock(&cq->rdi->n_cqs_lock);
-               if (likely(cq->rdi->worker)) {
+               rcu_read_lock();
+               worker = rcu_dereference(cq->rdi->worker);
+               if (likely(worker)) {
                        cq->notify = RVT_CQ_NONE;
                        cq->triggered++;
-                       kthread_queue_work(cq->rdi->worker, &cq->comptask);
+                       kthread_queue_work(worker, &cq->comptask);
                }
-               spin_unlock(&cq->rdi->n_cqs_lock);
+               rcu_read_unlock();
        }
 
        spin_unlock_irqrestore(&cq->lock, flags);
@@ -512,7 +515,7 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi)
        int cpu;
        struct kthread_worker *worker;
 
-       if (rdi->worker)
+       if (rcu_access_pointer(rdi->worker))
                return 0;
 
        spin_lock_init(&rdi->n_cqs_lock);
@@ -524,7 +527,7 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi)
                return PTR_ERR(worker);
 
        set_user_nice(worker->task, MIN_NICE);
-       rdi->worker = worker;
+       RCU_INIT_POINTER(rdi->worker, worker);
        return 0;
 }
 
@@ -536,15 +539,19 @@ void rvt_cq_exit(struct rvt_dev_info *rdi)
 {
        struct kthread_worker *worker;
 
-       /* block future queuing from send_complete() */
-       spin_lock_irq(&rdi->n_cqs_lock);
-       worker = rdi->worker;
+       if (!rcu_access_pointer(rdi->worker))
+               return;
+
+       spin_lock(&rdi->n_cqs_lock);
+       worker = rcu_dereference_protected(rdi->worker,
+                                          lockdep_is_held(&rdi->n_cqs_lock));
        if (!worker) {
-               spin_unlock_irq(&rdi->n_cqs_lock);
+               spin_unlock(&rdi->n_cqs_lock);
                return;
        }
-       rdi->worker = NULL;
-       spin_unlock_irq(&rdi->n_cqs_lock);
+       RCU_INIT_POINTER(rdi->worker, NULL);
+       spin_unlock(&rdi->n_cqs_lock);
+       synchronize_rcu();
 
        kthread_destroy_worker(worker);
 }
index 3f4c187e435d6f52b7390e813e16e48eb0fccc8b..eec495e68823b066d457bd1822549123eb4e818d 100644 (file)
@@ -402,7 +402,7 @@ struct rvt_dev_info {
        spinlock_t pending_lock; /* protect pending mmap list */
 
        /* CQ */
-       struct kthread_worker *worker; /* per device cq worker */
+       struct kthread_worker __rcu *worker; /* per device cq worker */
        u32 n_cqs_allocated;    /* number of CQs allocated for device */
        spinlock_t n_cqs_lock; /* protect count of in use cqs */