IB/{hfi1, rdmavt, qib}: Implement CQ completion vector support

author Sebastian Sanchez <sebastian.sanchez@intel.com>

Wed, 2 May 2018 13:43:55 +0000 (06:43 -0700)

committer Doug Ledford <dledford@redhat.com>

Wed, 9 May 2018 19:53:30 +0000 (15:53 -0400)
author Sebastian Sanchez <sebastian.sanchez@intel.com>
Wed, 2 May 2018 13:43:55 +0000 (06:43 -0700)
committer Doug Ledford <dledford@redhat.com>
Wed, 9 May 2018 19:53:30 +0000 (15:53 -0400)
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c

index eca9e635401767f947a69fdc1aa1c39af2c0e67c..fbe7198a715a1278f0b2c7068d8a740177f50253 100644 (file)
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -1,5 +1,5 @@
  /*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
   *
   * This file is provided under a dual BSD/GPLv2 license.  When using or
   * redistributing this file, you may do so under either license.
@@ -208,7 +208,13 @@ int node_affinity_init(void)
         return 0;
  }
  
-void node_affinity_destroy(void)
+static void node_affinity_destroy(struct hfi1_affinity_node *entry)
+{
+       free_percpu(entry->comp_vect_affinity);
+       kfree(entry);
+}
+
+void node_affinity_destroy_all(void)
  {
         struct list_head *pos, *q;
         struct hfi1_affinity_node *entry;
@@ -218,7 +224,7 @@ void node_affinity_destroy(void)
                 entry = list_entry(pos, struct hfi1_affinity_node,
                                    list);
                 list_del(pos);
-               kfree(entry);
+               node_affinity_destroy(entry);
         }
         mutex_unlock(&node_affinity.lock);
         kfree(hfi1_per_node_cntr);
@@ -232,6 +238,7 @@ static struct hfi1_affinity_node *node_affinity_allocate(int node)
         if (!entry)
                 return NULL;
         entry->node = node;
+       entry->comp_vect_affinity = alloc_percpu(u16);
         INIT_LIST_HEAD(&entry->list);
  
         return entry;
@@ -261,6 +268,341 @@ static struct hfi1_affinity_node *node_affinity_lookup(int node)
         return NULL;
  }
  
+static int per_cpu_affinity_get(cpumask_var_t possible_cpumask,
+                               u16 __percpu *comp_vect_affinity)
+{
+       int curr_cpu;
+       u16 cntr;
+       u16 prev_cntr;
+       int ret_cpu;
+
+       if (!possible_cpumask) {
+               ret_cpu = -EINVAL;
+               goto fail;
+       }
+
+       if (!comp_vect_affinity) {
+               ret_cpu = -EINVAL;
+               goto fail;
+       }
+
+       ret_cpu = cpumask_first(possible_cpumask);
+       if (ret_cpu >= nr_cpu_ids) {
+               ret_cpu = -EINVAL;
+               goto fail;
+       }
+
+       prev_cntr = *per_cpu_ptr(comp_vect_affinity, ret_cpu);
+       for_each_cpu(curr_cpu, possible_cpumask) {
+               cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu);
+
+               if (cntr < prev_cntr) {
+                       ret_cpu = curr_cpu;
+                       prev_cntr = cntr;
+               }
+       }
+
+       *per_cpu_ptr(comp_vect_affinity, ret_cpu) += 1;
+
+fail:
+       return ret_cpu;
+}
+
+static int per_cpu_affinity_put_max(cpumask_var_t possible_cpumask,
+                                   u16 __percpu *comp_vect_affinity)
+{
+       int curr_cpu;
+       int max_cpu;
+       u16 cntr;
+       u16 prev_cntr;
+
+       if (!possible_cpumask)
+               return -EINVAL;
+
+       if (!comp_vect_affinity)
+               return -EINVAL;
+
+       max_cpu = cpumask_first(possible_cpumask);
+       if (max_cpu >= nr_cpu_ids)
+               return -EINVAL;
+
+       prev_cntr = *per_cpu_ptr(comp_vect_affinity, max_cpu);
+       for_each_cpu(curr_cpu, possible_cpumask) {
+               cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu);
+
+               if (cntr > prev_cntr) {
+                       max_cpu = curr_cpu;
+                       prev_cntr = cntr;
+               }
+       }
+
+       *per_cpu_ptr(comp_vect_affinity, max_cpu) -= 1;
+
+       return max_cpu;
+}
+
+/*
+ * Non-interrupt CPUs are used first, then interrupt CPUs.
+ * Two already allocated cpu masks must be passed.
+ */
+static int _dev_comp_vect_cpu_get(struct hfi1_devdata *dd,
+                                 struct hfi1_affinity_node *entry,
+                                 cpumask_var_t non_intr_cpus,
+                                 cpumask_var_t available_cpus)
+       __must_hold(&node_affinity.lock)
+{
+       int cpu;
+       struct cpu_mask_set *set = dd->comp_vect;
+
+       lockdep_assert_held(&node_affinity.lock);
+       if (!non_intr_cpus) {
+               cpu = -1;
+               goto fail;
+       }
+
+       if (!available_cpus) {
+               cpu = -1;
+               goto fail;
+       }
+
+       /* Available CPUs for pinning completion vectors */
+       _cpu_mask_set_gen_inc(set);
+       cpumask_andnot(available_cpus, &set->mask, &set->used);
+
+       /* Available CPUs without SDMA engine interrupts */
+       cpumask_andnot(non_intr_cpus, available_cpus,
+                      &entry->def_intr.used);
+
+       /* If there are non-interrupt CPUs available, use them first */
+       if (!cpumask_empty(non_intr_cpus))
+               cpu = cpumask_first(non_intr_cpus);
+       else /* Otherwise, use interrupt CPUs */
+               cpu = cpumask_first(available_cpus);
+
+       if (cpu >= nr_cpu_ids) { /* empty */
+               cpu = -1;
+               goto fail;
+       }
+       cpumask_set_cpu(cpu, &set->used);
+
+fail:
+       return cpu;
+}
+
+static void _dev_comp_vect_cpu_put(struct hfi1_devdata *dd, int cpu)
+{
+       struct cpu_mask_set *set = dd->comp_vect;
+
+       if (cpu < 0)
+               return;
+
+       cpu_mask_set_put(set, cpu);
+}
+
+/* _dev_comp_vect_mappings_destroy() is reentrant */
+static void _dev_comp_vect_mappings_destroy(struct hfi1_devdata *dd)
+{
+       int i, cpu;
+
+       if (!dd->comp_vect_mappings)
+               return;
+
+       for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
+               cpu = dd->comp_vect_mappings[i];
+               _dev_comp_vect_cpu_put(dd, cpu);
+               dd->comp_vect_mappings[i] = -1;
+               hfi1_cdbg(AFFINITY,
+                         "[%s] Release CPU %d from completion vector %d",
+                         rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), cpu, i);
+       }
+
+       kfree(dd->comp_vect_mappings);
+       dd->comp_vect_mappings = NULL;
+}
+
+/*
+ * This function creates the table for looking up CPUs for completion vectors.
+ * num_comp_vectors needs to have been initilized before calling this function.
+ */
+static int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd,
+                                         struct hfi1_affinity_node *entry)
+       __must_hold(&node_affinity.lock)
+{
+       int i, cpu, ret;
+       cpumask_var_t non_intr_cpus;
+       cpumask_var_t available_cpus;
+
+       lockdep_assert_held(&node_affinity.lock);
+
+       if (!zalloc_cpumask_var(&non_intr_cpus, GFP_KERNEL))
+               return -ENOMEM;
+
+       if (!zalloc_cpumask_var(&available_cpus, GFP_KERNEL)) {
+               free_cpumask_var(non_intr_cpus);
+               return -ENOMEM;
+       }
+
+       dd->comp_vect_mappings = kcalloc(dd->comp_vect_possible_cpus,
+                                        sizeof(*dd->comp_vect_mappings),
+                                        GFP_KERNEL);
+       if (!dd->comp_vect_mappings) {
+               ret = -ENOMEM;
+               goto fail;
+       }
+       for (i = 0; i < dd->comp_vect_possible_cpus; i++)
+               dd->comp_vect_mappings[i] = -1;
+
+       for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
+               cpu = _dev_comp_vect_cpu_get(dd, entry, non_intr_cpus,
+                                            available_cpus);
+               if (cpu < 0) {
+                       ret = -EINVAL;
+                       goto fail;
+               }
+
+               dd->comp_vect_mappings[i] = cpu;
+               hfi1_cdbg(AFFINITY,
+                         "[%s] Completion Vector %d -> CPU %d",
+                         rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu);
+       }
+
+       return 0;
+
+fail:
+       free_cpumask_var(available_cpus);
+       free_cpumask_var(non_intr_cpus);
+       _dev_comp_vect_mappings_destroy(dd);
+
+       return ret;
+}
+
+int hfi1_comp_vectors_set_up(struct hfi1_devdata *dd)
+{
+       int ret;
+       struct hfi1_affinity_node *entry;
+
+       mutex_lock(&node_affinity.lock);
+       entry = node_affinity_lookup(dd->node);
+       if (!entry) {
+               ret = -EINVAL;
+               goto unlock;
+       }
+       ret = _dev_comp_vect_mappings_create(dd, entry);
+unlock:
+       mutex_unlock(&node_affinity.lock);
+
+       return ret;
+}
+
+void hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd)
+{
+       _dev_comp_vect_mappings_destroy(dd);
+}
+
+int hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect)
+{
+       struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
+       struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
+
+       if (!dd->comp_vect_mappings)
+               return -EINVAL;
+       if (comp_vect >= dd->comp_vect_possible_cpus)
+               return -EINVAL;
+
+       return dd->comp_vect_mappings[comp_vect];
+}
+
+/*
+ * It assumes dd->comp_vect_possible_cpus is available.
+ */
+static int _dev_comp_vect_cpu_mask_init(struct hfi1_devdata *dd,
+                                       struct hfi1_affinity_node *entry,
+                                       bool first_dev_init)
+       __must_hold(&node_affinity.lock)
+{
+       int i, j, curr_cpu;
+       int possible_cpus_comp_vect = 0;
+       struct cpumask *dev_comp_vect_mask = &dd->comp_vect->mask;
+
+       lockdep_assert_held(&node_affinity.lock);
+       /*
+        * If there's only one CPU available for completion vectors, then
+        * there will only be one completion vector available. Othewise,
+        * the number of completion vector available will be the number of
+        * available CPUs divide it by the number of devices in the
+        * local NUMA node.
+        */
+       if (cpumask_weight(&entry->comp_vect_mask) == 1) {
+               possible_cpus_comp_vect = 1;
+               dd_dev_warn(dd,
+                           "Number of kernel receive queues is too large for completion vector affinity to be effective\n");
+       } else {
+               possible_cpus_comp_vect +=
+                       cpumask_weight(&entry->comp_vect_mask) /
+                                      hfi1_per_node_cntr[dd->node];
+
+               /*
+                * If the completion vector CPUs available doesn't divide
+                * evenly among devices, then the first device device to be
+                * initialized gets an extra CPU.
+                */
+               if (first_dev_init &&
+                   cpumask_weight(&entry->comp_vect_mask) %
+                   hfi1_per_node_cntr[dd->node] != 0)
+                       possible_cpus_comp_vect++;
+       }
+
+       dd->comp_vect_possible_cpus = possible_cpus_comp_vect;
+
+       /* Reserving CPUs for device completion vector */
+       for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
+               curr_cpu = per_cpu_affinity_get(&entry->comp_vect_mask,
+                                               entry->comp_vect_affinity);
+               if (curr_cpu < 0)
+                       goto fail;
+
+               cpumask_set_cpu(curr_cpu, dev_comp_vect_mask);
+       }
+
+       hfi1_cdbg(AFFINITY,
+                 "[%s] Completion vector affinity CPU set(s) %*pbl",
+                 rvt_get_ibdev_name(&(dd)->verbs_dev.rdi),
+                 cpumask_pr_args(dev_comp_vect_mask));
+
+       return 0;
+
+fail:
+       for (j = 0; j < i; j++)
+               per_cpu_affinity_put_max(&entry->comp_vect_mask,
+                                        entry->comp_vect_affinity);
+
+       return curr_cpu;
+}
+
+/*
+ * It assumes dd->comp_vect_possible_cpus is available.
+ */
+static void _dev_comp_vect_cpu_mask_clean_up(struct hfi1_devdata *dd,
+                                            struct hfi1_affinity_node *entry)
+       __must_hold(&node_affinity.lock)
+{
+       int i, cpu;
+
+       lockdep_assert_held(&node_affinity.lock);
+       if (!dd->comp_vect_possible_cpus)
+               return;
+
+       for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
+               cpu = per_cpu_affinity_put_max(&dd->comp_vect->mask,
+                                              entry->comp_vect_affinity);
+               /* Clearing CPU in device completion vector cpu mask */
+               if (cpu >= 0)
+                       cpumask_clear_cpu(cpu, &dd->comp_vect->mask);
+       }
+
+       dd->comp_vect_possible_cpus = 0;
+}
+
  /*
   * Interrupt affinity.
   *
@@ -277,7 +619,8 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
         int node = pcibus_to_node(dd->pcidev->bus);
         struct hfi1_affinity_node *entry;
         const struct cpumask *local_mask;
-       int curr_cpu, possible, i;
+       int curr_cpu, possible, i, ret;
+       bool new_entry = false;
  
         if (node < 0)
                 node = numa_node_id();
@@ -299,11 +642,14 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
                 if (!entry) {
                         dd_dev_err(dd,
                                    "Unable to allocate global affinity node\n");
-                       mutex_unlock(&node_affinity.lock);
-                       return -ENOMEM;
+                       ret = -ENOMEM;
+                       goto fail;
                 }
+               new_entry = true;
+
                 init_cpu_mask_set(&entry->def_intr);
                 init_cpu_mask_set(&entry->rcv_intr);
+               cpumask_clear(&entry->comp_vect_mask);
                 cpumask_clear(&entry->general_intr_mask);
                 /* Use the "real" cpu mask of this node as the default */
                 cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask,
@@ -356,10 +702,64 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
                                              &entry->general_intr_mask);
                 }
  
-               node_affinity_add_tail(entry);
+               /* Determine completion vector CPUs for the entire node */
+               cpumask_and(&entry->comp_vect_mask,
+                           &node_affinity.real_cpu_mask, local_mask);
+               cpumask_andnot(&entry->comp_vect_mask,
+                              &entry->comp_vect_mask,
+                              &entry->rcv_intr.mask);
+               cpumask_andnot(&entry->comp_vect_mask,
+                              &entry->comp_vect_mask,
+                              &entry->general_intr_mask);
+
+               /*
+                * If there ends up being 0 CPU cores leftover for completion
+                * vectors, use the same CPU core as the general/control
+                * context.
+                */
+               if (cpumask_weight(&entry->comp_vect_mask) == 0)
+                       cpumask_copy(&entry->comp_vect_mask,
+                                    &entry->general_intr_mask);
         }
+
+       ret = _dev_comp_vect_cpu_mask_init(dd, entry, new_entry);
+       if (ret < 0)
+               goto fail;
+
+       if (new_entry)
+               node_affinity_add_tail(entry);
+
         mutex_unlock(&node_affinity.lock);
+
         return 0;
+
+fail:
+       if (new_entry)
+               node_affinity_destroy(entry);
+       mutex_unlock(&node_affinity.lock);
+       return ret;
+}
+
+void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd)
+{
+       struct hfi1_affinity_node *entry;
+
+       if (dd->node < 0)
+               return;
+
+       mutex_lock(&node_affinity.lock);
+       entry = node_affinity_lookup(dd->node);
+       if (!entry)
+               goto unlock;
+
+       /*
+        * Free device completion vector CPUs to be used by future
+        * completion vectors
+        */
+       _dev_comp_vect_cpu_mask_clean_up(dd, entry);
+unlock:
+       mutex_unlock(&node_affinity.lock);
+       dd->node = -1;
  }
  
  /*
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h

index 2a1e374169c0aa3c3fff087516e2a501448372f0..6a7e6ea4e42607ce9c2653121b51538274c01d16 100644 (file)
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -1,5 +1,5 @@
  /*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
   *
   * This file is provided under a dual BSD/GPLv2 license.  When using or
   * redistributing this file, you may do so under either license.
@@ -98,9 +98,11 @@ void hfi1_put_proc_affinity(int cpu);
  
  struct hfi1_affinity_node {
         int node;
+       u16 __percpu *comp_vect_affinity;
         struct cpu_mask_set def_intr;
         struct cpu_mask_set rcv_intr;
         struct cpumask general_intr_mask;
+       struct cpumask comp_vect_mask;
         struct list_head list;
  };
  
@@ -116,7 +118,11 @@ struct hfi1_affinity_node_list {
  };
  
  int node_affinity_init(void);
-void node_affinity_destroy(void);
+void node_affinity_destroy_all(void);
  extern struct hfi1_affinity_node_list node_affinity;
+void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd);
+int hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect);
+int hfi1_comp_vectors_set_up(struct hfi1_devdata *dd);
+void hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd);
  
  #endif /* _HFI1_AFFINITY_H */
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c

index 0fab6df0a3450490510da60b7f05e9de4fdd49cb..46e9e4ffcba4659941f9a5fbe953d3d94b304cda 100644 (file)
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -15233,6 +15233,10 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
         if (ret)
                 goto bail_cleanup;
  
+       ret = hfi1_comp_vectors_set_up(dd);
+       if (ret)
+               goto bail_clear_intr;
+
         /* set up LCB access - must be after set_up_interrupts() */
         init_lcb_access(dd);
  
@@ -15275,6 +15279,7 @@ bail_free_rcverr:
  bail_free_cntrs:
         free_cntrs(dd);
  bail_clear_intr:
+       hfi1_comp_vectors_clean_up(dd);
         hfi1_clean_up_interrupts(dd);
  bail_cleanup:
         hfi1_pcie_ddcleanup(dd);
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h

index 9cd758ce776428367789db58e2b49084518105c7..dd84238c1aac9fc7cdbdb8f330e46a7682faba65 100644 (file)
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1263,6 +1263,9 @@ struct hfi1_devdata {
  
         /* Save the enabled LCB error bits */
         u64 lcb_err_en;
+       struct cpu_mask_set *comp_vect;
+       int *comp_vect_mappings;
+       u32 comp_vect_possible_cpus;
  
         /*
          * Capability to have different send engines simply by changing a
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c

index 790542ce89a5cb44ac0389feb4b9e057a08dfb92..5d1adfc450d3b419da3c6de355d9c98e17307dc1 100644 (file)
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1,5 +1,5 @@
  /*
- * Copyright(c) 2015-2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
   *
   * This file is provided under a dual BSD/GPLv2 license.  When using or
   * redistributing this file, you may do so under either license.
@@ -1244,6 +1244,8 @@ static void hfi1_clean_devdata(struct hfi1_devdata *dd)
         dd->rcv_limit     = NULL;
         dd->send_schedule = NULL;
         dd->tx_opstats    = NULL;
+       kfree(dd->comp_vect);
+       dd->comp_vect = NULL;
         sdma_clean(dd, dd->num_sdma);
         rvt_dealloc_device(&dd->verbs_dev.rdi);
  }
@@ -1300,6 +1302,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
                 dd->unit = ret;
                 list_add(&dd->list, &hfi1_dev_list);
         }
+       dd->node = -1;
  
         spin_unlock_irqrestore(&hfi1_devs_lock, flags);
         idr_preload_end();
@@ -1352,6 +1355,12 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
                 goto bail;
         }
  
+       dd->comp_vect = kzalloc(sizeof(*dd->comp_vect), GFP_KERNEL);
+       if (!dd->comp_vect) {
+               ret = -ENOMEM;
+               goto bail;
+       }
+
         kobject_init(&dd->kobj, &hfi1_devdata_type);
         return dd;
  
@@ -1521,7 +1530,7 @@ module_init(hfi1_mod_init);
  static void __exit hfi1_mod_cleanup(void)
  {
         pci_unregister_driver(&hfi1_pci_driver);
-       node_affinity_destroy();
+       node_affinity_destroy_all();
         hfi1_wss_exit();
         hfi1_dbg_exit();
  
@@ -1605,6 +1614,8 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
  static void postinit_cleanup(struct hfi1_devdata *dd)
  {
         hfi1_start_cleanup(dd);
+       hfi1_comp_vectors_clean_up(dd);
+       hfi1_dev_affinity_clean_up(dd);
  
         hfi1_pcie_ddcleanup(dd);
         hfi1_pcie_cleanup(dd->pcidev);
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c

index 89bd9851065b8b2d2e0b13bb8fee8e477e3f17a6..332b9b7c554ad6e73a3f578c243dcc463a149f84 100644 (file)
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -1,5 +1,5 @@
  /*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
   *
   * This file is provided under a dual BSD/GPLv2 license.  When using or
   * redistributing this file, you may do so under either license.
@@ -374,6 +374,7 @@ const char *print_u32_array(
         return ret;
  }
  
+__hfi1_trace_fn(AFFINITY);
  __hfi1_trace_fn(PKT);
  __hfi1_trace_fn(PROC);
  __hfi1_trace_fn(SDMA);
diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h

index 0e7d929530c5b005dbd87873741e250f397f9312..e62171fb7379f7ee63960109f7b6bb580b974f21 100644 (file)
--- a/drivers/infiniband/hw/hfi1/trace_dbg.h
+++ b/drivers/infiniband/hw/hfi1/trace_dbg.h
@@ -1,5 +1,5 @@
  /*
-* Copyright(c) 2015, 2016 Intel Corporation.
+* Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -113,6 +113,7 @@ void __hfi1_trace_##lvl(const char *func, char *fmt, ...)           \
   * hfi1_cdbg(LVL, fmt, ...); as well as take care of all
   * the debugfs stuff.
   */
+__hfi1_trace_def(AFFINITY);
  __hfi1_trace_def(PKT);
  __hfi1_trace_def(PROC);
  __hfi1_trace_def(SDMA);
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c

index 9554e912af98253a7f91c673e626059ca2aec742..fc2e44cde161348457f96dbc84fdde7ecae1f52c 100644 (file)
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -64,6 +64,7 @@
  #include "debugfs.h"
  #include "vnic.h"
  #include "fault.h"
+#include "affinity.h"
  
  static unsigned int hfi1_lkey_table_size = 16;
  module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
@@ -1934,11 +1935,11 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
         dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
         dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc;
         dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
+       dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup =
+                                               hfi1_comp_vect_mappings_lookup;
  
         /* completeion queue */
-       snprintf(dd->verbs_dev.rdi.dparms.cq_name,
-                sizeof(dd->verbs_dev.rdi.dparms.cq_name),
-                "hfi1_cq%d", dd->unit);
+       dd->verbs_dev.rdi.ibdev.num_comp_vectors = dd->comp_vect_possible_cpus;
         dd->verbs_dev.rdi.dparms.node = dd->node;
  
         /* misc settings */
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c

index 3977abbc83ad608ef4af4c63961cc47d91d583f7..14b4057a2b8f95306c322d6b3c5a98a77ac6f6a7 100644 (file)
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2012, 2013 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2012 - 2018 Intel Corporation.  All rights reserved.
   * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
   * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
   *
@@ -1631,10 +1631,6 @@ int qib_register_ib_device(struct qib_devdata *dd)
         dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_IBA_IB;
         dd->verbs_dev.rdi.dparms.max_mad_size = IB_MGMT_MAD_SIZE;
  
-       snprintf(dd->verbs_dev.rdi.dparms.cq_name,
-                sizeof(dd->verbs_dev.rdi.dparms.cq_name),
-                "qib_cq%d", dd->unit);
-
         qib_fill_device_attr(dd);
  
         ppd = dd->pport;
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c

index 340c17aba3b0771ce2a7fb467cb7f6f8588e900a..4f1544ad4affb5e48290c6ed94ba2b5f814a5888 100644 (file)
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -1,5 +1,5 @@
  /*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
   *
   * This file is provided under a dual BSD/GPLv2 license.  When using or
   * redistributing this file, you may do so under either license.
@@ -47,11 +47,12 @@
  
  #include <linux/slab.h>
  #include <linux/vmalloc.h>
-#include <linux/kthread.h>
  #include "cq.h"
  #include "vt.h"
  #include "trace.h"
  
+static struct workqueue_struct *comp_vector_wq;
+
  /**
   * rvt_cq_enter - add a new entry to the completion queue
   * @cq: completion queue
@@ -120,27 +121,21 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
         if (cq->notify == IB_CQ_NEXT_COMP ||
             (cq->notify == IB_CQ_SOLICITED &&
              (solicited || entry->status != IB_WC_SUCCESS))) {
-               struct kthread_worker *worker;
-
                 /*
                  * This will cause send_complete() to be called in
                  * another thread.
                  */
-               rcu_read_lock();
-               worker = rcu_dereference(cq->rdi->worker);
-               if (likely(worker)) {
-                       cq->notify = RVT_CQ_NONE;
-                       cq->triggered++;
-                       kthread_queue_work(worker, &cq->comptask);
-               }
-               rcu_read_unlock();
+               cq->notify = RVT_CQ_NONE;
+               cq->triggered++;
+               queue_work_on(cq->comp_vector_cpu, comp_vector_wq,
+                             &cq->comptask);
         }
  
         spin_unlock_irqrestore(&cq->lock, flags);
  }
  EXPORT_SYMBOL(rvt_cq_enter);
  
-static void send_complete(struct kthread_work *work)
+static void send_complete(struct work_struct *work)
  {
         struct rvt_cq *cq = container_of(work, struct rvt_cq, comptask);
  
@@ -192,6 +187,7 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
         struct ib_cq *ret;
         u32 sz;
         unsigned int entries = attr->cqe;
+       int comp_vector = attr->comp_vector;
  
         if (attr->flags)
                 return ERR_PTR(-EINVAL);
@@ -199,6 +195,11 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
         if (entries < 1 || entries > rdi->dparms.props.max_cqe)
                 return ERR_PTR(-EINVAL);
  
+       if (comp_vector < 0)
+               comp_vector = 0;
+
+       comp_vector = comp_vector % rdi->ibdev.num_comp_vectors;
+
         /* Allocate the completion queue structure. */
         cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, rdi->dparms.node);
         if (!cq)
@@ -267,14 +268,22 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
          * an error.
          */
         cq->rdi = rdi;
+       if (rdi->driver_f.comp_vect_cpu_lookup)
+               cq->comp_vector_cpu =
+                       rdi->driver_f.comp_vect_cpu_lookup(rdi, comp_vector);
+       else
+               cq->comp_vector_cpu =
+                       cpumask_first(cpumask_of_node(rdi->dparms.node));
+
         cq->ibcq.cqe = entries;
         cq->notify = RVT_CQ_NONE;
         spin_lock_init(&cq->lock);
-       kthread_init_work(&cq->comptask, send_complete);
+       INIT_WORK(&cq->comptask, send_complete);
         cq->queue = wc;
  
         ret = &cq->ibcq;
  
+       trace_rvt_create_cq(cq, attr);
         goto done;
  
  bail_ip:
@@ -300,7 +309,7 @@ int rvt_destroy_cq(struct ib_cq *ibcq)
         struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
         struct rvt_dev_info *rdi = cq->rdi;
  
-       kthread_flush_work(&cq->comptask);
+       flush_work(&cq->comptask);
         spin_lock_irq(&rdi->n_cqs_lock);
         rdi->n_cqs_allocated--;
         spin_unlock_irq(&rdi->n_cqs_lock);
@@ -510,24 +519,13 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
   *
   * Return: 0 on success
   */
-int rvt_driver_cq_init(struct rvt_dev_info *rdi)
+int rvt_driver_cq_init(void)
  {
-       int cpu;
-       struct kthread_worker *worker;
-
-       if (rcu_access_pointer(rdi->worker))
-               return 0;
-
-       spin_lock_init(&rdi->n_cqs_lock);
-
-       cpu = cpumask_first(cpumask_of_node(rdi->dparms.node));
-       worker = kthread_create_worker_on_cpu(cpu, 0,
-                                             "%s", rdi->dparms.cq_name);
-       if (IS_ERR(worker))
-               return PTR_ERR(worker);
+       comp_vector_wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_CPU_INTENSIVE,
+                                        0, "rdmavt_cq");
+       if (!comp_vector_wq)
+               return -ENOMEM;
  
-       set_user_nice(worker->task, MIN_NICE);
-       RCU_INIT_POINTER(rdi->worker, worker);
         return 0;
  }
  
@@ -535,23 +533,8 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi)
   * rvt_cq_exit - tear down cq reources
   * @rdi: rvt dev structure
   */
-void rvt_cq_exit(struct rvt_dev_info *rdi)
+void rvt_cq_exit(void)
  {
-       struct kthread_worker *worker;
-
-       if (!rcu_access_pointer(rdi->worker))
-               return;
-
-       spin_lock(&rdi->n_cqs_lock);
-       worker = rcu_dereference_protected(rdi->worker,
-                                          lockdep_is_held(&rdi->n_cqs_lock));
-       if (!worker) {
-               spin_unlock(&rdi->n_cqs_lock);
-               return;
-       }
-       RCU_INIT_POINTER(rdi->worker, NULL);
-       spin_unlock(&rdi->n_cqs_lock);
-       synchronize_rcu();
-
-       kthread_destroy_worker(worker);
+       destroy_workqueue(comp_vector_wq);
+       comp_vector_wq = NULL;
  }
diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h

index 6182c29eff66b8cf320497482b812311975f5665..72184b1c176ba8989fcf85c0a3147281949544ac 100644 (file)
--- a/drivers/infiniband/sw/rdmavt/cq.h
+++ b/drivers/infiniband/sw/rdmavt/cq.h
@@ -2,7 +2,7 @@
  #define DEF_RVTCQ_H
  
  /*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
   *
   * This file is provided under a dual BSD/GPLv2 license.  When using or
   * redistributing this file, you may do so under either license.
@@ -59,6 +59,6 @@ int rvt_destroy_cq(struct ib_cq *ibcq);
  int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
  int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
  int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-int rvt_driver_cq_init(struct rvt_dev_info *rdi);
-void rvt_cq_exit(struct rvt_dev_info *rdi);
+int rvt_driver_cq_init(void);
+void rvt_cq_exit(void);
  #endif          /* DEF_RVTCQ_H */
diff --git a/drivers/infiniband/sw/rdmavt/trace_cq.h b/drivers/infiniband/sw/rdmavt/trace_cq.h

index a315850aa9bbd3629baca84ea8c267a6f71ee2e9..df8e1adbef9df6abf06aad2460ec5a1a86f8e72a 100644 (file)
--- a/drivers/infiniband/sw/rdmavt/trace_cq.h
+++ b/drivers/infiniband/sw/rdmavt/trace_cq.h
@@ -1,5 +1,5 @@
  /*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
   *
   * This file is provided under a dual BSD/GPLv2 license.  When using or
   * redistributing this file, you may do so under either license.
@@ -71,6 +71,39 @@ __print_symbolic(opcode,                                      \
         wc_opcode_name(RECV),                                 \
         wc_opcode_name(RECV_RDMA_WITH_IMM))
  
+#define CQ_ATTR_PRINT \
+"[%s] user cq %s cqe %u comp_vector %d comp_vector_cpu %d flags %x"
+
+DECLARE_EVENT_CLASS(rvt_cq_template,
+                   TP_PROTO(struct rvt_cq *cq,
+                            const struct ib_cq_init_attr *attr),
+                   TP_ARGS(cq, attr),
+                   TP_STRUCT__entry(RDI_DEV_ENTRY(cq->rdi)
+                                    __field(struct rvt_mmap_info *, ip)
+                                    __field(unsigned int, cqe)
+                                    __field(int, comp_vector)
+                                    __field(int, comp_vector_cpu)
+                                    __field(u32, flags)
+                                    ),
+                   TP_fast_assign(RDI_DEV_ASSIGN(cq->rdi)
+                                  __entry->ip = cq->ip;
+                                  __entry->cqe = attr->cqe;
+                                  __entry->comp_vector = attr->comp_vector;
+                                  __entry->comp_vector_cpu =
+                                                       cq->comp_vector_cpu;
+                                  __entry->flags = attr->flags;
+                                  ),
+                   TP_printk(CQ_ATTR_PRINT, __get_str(dev),
+                             __entry->ip ? "true" : "false", __entry->cqe,
+                             __entry->comp_vector, __entry->comp_vector_cpu,
+                             __entry->flags
+                             )
+);
+
+DEFINE_EVENT(rvt_cq_template, rvt_create_cq,
+            TP_PROTO(struct rvt_cq *cq, const struct ib_cq_init_attr *attr),
+            TP_ARGS(cq, attr));
+
  #define CQ_PRN \
  "[%s] idx %u wr_id %llx status %u opcode %u,%s length %u qpn %x"
  
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c

index 434199d0bc96ca149184605be52747aec6518393..17e4abc067afa3e7c6e5d4f09477615a367688f6 100644 (file)
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -1,5 +1,5 @@
  /*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
   *
   * This file is provided under a dual BSD/GPLv2 license.  When using or
   * redistributing this file, you may do so under either license.
@@ -49,6 +49,7 @@
  #include <linux/kernel.h>
  #include <linux/dma-mapping.h>
  #include "vt.h"
+#include "cq.h"
  #include "trace.h"
  
  #define RVT_UVERBS_ABI_VERSION 2
@@ -58,21 +59,18 @@ MODULE_DESCRIPTION("RDMA Verbs Transport Library");
  
  static int rvt_init(void)
  {
-       /*
-        * rdmavt does not need to do anything special when it starts up. All it
-        * needs to do is sit and wait until a driver attempts registration.
-        */
-       return 0;
+       int ret = rvt_driver_cq_init();
+
+       if (ret)
+               pr_err("Error in driver CQ init.\n");
+
+       return ret;
  }
  module_init(rvt_init);
  
  static void rvt_cleanup(void)
  {
-       /*
-        * Nothing to do at exit time either. The module won't be able to be
-        * removed until all drivers are gone which means all the dev structs
-        * are gone so there is really nothing to do.
-        */
+       rvt_cq_exit();
  }
  module_exit(rvt_cleanup);
  
@@ -777,11 +775,7 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
         }
  
         /* Completion queues */
-       ret = rvt_driver_cq_init(rdi);
-       if (ret) {
-               pr_err("Error in driver CQ init.\n");
-               goto bail_mr;
-       }
+       spin_lock_init(&rdi->n_cqs_lock);
  
         /* DMA Operations */
         rdi->ibdev.dev.dma_ops = rdi->ibdev.dev.dma_ops ? : &dma_virt_ops;
@@ -829,14 +823,15 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
                 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
                 (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
         rdi->ibdev.node_type = RDMA_NODE_IB_CA;
-       rdi->ibdev.num_comp_vectors = 1;
+       if (!rdi->ibdev.num_comp_vectors)
+               rdi->ibdev.num_comp_vectors = 1;
  
         rdi->ibdev.driver_id = driver_id;
         /* We are now good to announce we exist */
         ret =  ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback);
         if (ret) {
                 rvt_pr_err(rdi, "Failed to register driver with ib core.\n");
-               goto bail_cq;
+               goto bail_mr;
         }
  
         rvt_create_mad_agents(rdi);
@@ -844,9 +839,6 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
         rvt_pr_info(rdi, "Registration with rdmavt done.\n");
         return ret;
  
-bail_cq:
-       rvt_cq_exit(rdi);
-
  bail_mr:
         rvt_mr_exit(rdi);
  
@@ -870,7 +862,6 @@ void rvt_unregister_device(struct rvt_dev_info *rdi)
         rvt_free_mad_agents(rdi);
  
         ib_unregister_device(&rdi->ibdev);
-       rvt_cq_exit(rdi);
         rvt_mr_exit(rdi);
         rvt_qp_exit(rdi);
  }
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h

index eec495e68823b066d457bd1822549123eb4e818d..e79229a0cf0147a8d86dfa9f48d02f972c454187 100644 (file)
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -2,7 +2,7 @@
  #define DEF_RDMA_VT_H
  
  /*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
   *
   * This file is provided under a dual BSD/GPLv2 license.  When using or
   * redistributing this file, you may do so under either license.
@@ -167,7 +167,6 @@ struct rvt_driver_params {
         int qpn_res_end;
         int nports;
         int npkeys;
-       char cq_name[RVT_CQN_MAX];
         int node;
         int psn_mask;
         int psn_shift;
@@ -347,6 +346,9 @@ struct rvt_driver_provided {
  
         /* Notify driver to restart rc */
         void (*notify_restart_rc)(struct rvt_qp *qp, u32 psn, int wait);
+
+       /* Get and return CPU to pin CQ processing thread */
+       int (*comp_vect_cpu_lookup)(struct rvt_dev_info *rdi, int comp_vect);
  };
  
  struct rvt_dev_info {
@@ -402,7 +404,6 @@ struct rvt_dev_info {
         spinlock_t pending_lock; /* protect pending mmap list */
  
         /* CQ */
-       struct kthread_worker __rcu *worker; /* per device cq worker */
         u32 n_cqs_allocated;    /* number of CQs allocated for device */
         spinlock_t n_cqs_lock; /* protect count of in use cqs */
  
diff --git a/include/rdma/rdmavt_cq.h b/include/rdma/rdmavt_cq.h

index 51fd00b243d0104e379835c18104a3676addc526..75dc65c0bfb83d81b25fea05905373fa8d5d9e08 100644 (file)
--- a/include/rdma/rdmavt_cq.h
+++ b/include/rdma/rdmavt_cq.h
@@ -8,7 +8,7 @@
   *
   * GPL LICENSE SUMMARY
   *
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of version 2 of the GNU General Public License as
@@ -80,10 +80,11 @@ struct rvt_cq_wc {
   */
  struct rvt_cq {
         struct ib_cq ibcq;
-       struct kthread_work comptask;
+       struct work_struct comptask;
         spinlock_t lock; /* protect changes in this struct */
         u8 notify;
         u8 triggered;
+       int comp_vector_cpu;
         struct rvt_dev_info *rdi;
         struct rvt_cq_wc *queue;
         struct rvt_mmap_info *ip;
author	Sebastian Sanchez <sebastian.sanchez@intel.com>
	Wed, 2 May 2018 13:43:55 +0000 (06:43 -0700)
committer	Doug Ledford <dledford@redhat.com>
	Wed, 9 May 2018 19:53:30 +0000 (15:53 -0400)
drivers/infiniband/hw/hfi1/affinity.c		patch \| blob \| history
drivers/infiniband/hw/hfi1/affinity.h		patch \| blob \| history
drivers/infiniband/hw/hfi1/chip.c		patch \| blob \| history
drivers/infiniband/hw/hfi1/hfi.h		patch \| blob \| history
drivers/infiniband/hw/hfi1/init.c		patch \| blob \| history
drivers/infiniband/hw/hfi1/trace.c		patch \| blob \| history
drivers/infiniband/hw/hfi1/trace_dbg.h		patch \| blob \| history
drivers/infiniband/hw/hfi1/verbs.c		patch \| blob \| history
drivers/infiniband/hw/qib/qib_verbs.c		patch \| blob \| history
drivers/infiniband/sw/rdmavt/cq.c		patch \| blob \| history
drivers/infiniband/sw/rdmavt/cq.h		patch \| blob \| history
drivers/infiniband/sw/rdmavt/trace_cq.h		patch \| blob \| history
drivers/infiniband/sw/rdmavt/vt.c		patch \| blob \| history
include/rdma/rdma_vt.h		patch \| blob \| history
include/rdma/rdmavt_cq.h		patch \| blob \| history