staging/rdma/hfi1: Handle packets with invalid RHF on context 0
authorNiranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Wed, 11 Nov 2015 05:35:19 +0000 (00:35 -0500)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 20 Nov 2015 01:07:51 +0000 (17:07 -0800)
Context 0 (which handles the error packets) can potentially receive an invalid
rhf. Hence, it can not depend on RHF sequence number and can only use DMA_RTAIL
mechanism. Detect such packets with invalid rhf using rhf sequence counting
mechanism and drop them.

As DMA_RTAIL mechanism has performance penalties, do not use context 0 for
performance critical verbs path. Use context 0 for VL15 (MAD), multicast and
error packets.

Reviewed-by: Arthur Kepner <arthur.kepner@intel.com>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Reviewed-by: Dean Luick <dean.luick@intel.com>
Reviewed-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/staging/rdma/hfi1/chip.c
drivers/staging/rdma/hfi1/driver.c
drivers/staging/rdma/hfi1/hfi.h
drivers/staging/rdma/hfi1/init.c

index 456704e9629a1e5385492a2460b6d4a4d8853d9d..dc6915947c78d6cae88a3b83ea546d7ff5cd1baa 100644 (file)
@@ -121,8 +121,8 @@ struct flag_table {
 #define SEC_SC_HALTED          0x4     /* per-context only */
 #define SEC_SPC_FREEZE         0x8     /* per-HFI only */
 
-#define VL15CTXT                  1
 #define MIN_KERNEL_KCTXTS         2
+#define FIRST_KERNEL_KCTXT        1
 #define NUM_MAP_REGS             32
 
 /* Bit offset into the GUID which carries HFI id information */
@@ -7780,8 +7780,8 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
                                        & RCV_TID_CTRL_TID_BASE_INDEX_MASK)
                                << RCV_TID_CTRL_TID_BASE_INDEX_SHIFT);
                write_kctxt_csr(dd, ctxt, RCV_TID_CTRL, reg);
-               if (ctxt == VL15CTXT)
-                       write_csr(dd, RCV_VL15, VL15CTXT);
+               if (ctxt == HFI1_CTRL_CTXT)
+                       write_csr(dd, RCV_VL15, HFI1_CTRL_CTXT);
        }
        if (op & HFI1_RCVCTRL_CTXT_DIS) {
                write_csr(dd, RCV_VL15, 0);
@@ -8908,7 +8908,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
        int first_general, last_general;
        int first_sdma, last_sdma;
        int first_rx, last_rx;
-       int first_cpu, restart_cpu, curr_cpu;
+       int first_cpu, curr_cpu;
        int rcv_cpu, sdma_cpu;
        int i, ret = 0, possible;
        int ht;
@@ -8947,22 +8947,19 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
                        topology_sibling_cpumask(cpumask_first(local_mask)));
        for (i = possible/ht; i < possible; i++)
                cpumask_clear_cpu(i, def);
-       /* reset possible */
-       possible = cpumask_weight(def);
        /* def now has full cores on chosen node*/
        first_cpu = cpumask_first(def);
        if (nr_cpu_ids >= first_cpu)
                first_cpu++;
-       restart_cpu = first_cpu;
-       curr_cpu = restart_cpu;
+       curr_cpu = first_cpu;
 
-       for (i = first_cpu; i < dd->n_krcv_queues + first_cpu; i++) {
+       /*  One context is reserved as control context */
+       for (i = first_cpu; i < dd->n_krcv_queues + first_cpu - 1; i++) {
                cpumask_clear_cpu(curr_cpu, def);
                cpumask_set_cpu(curr_cpu, rcv);
-               if (curr_cpu >= possible)
-                       curr_cpu = restart_cpu;
-               else
-                       curr_cpu++;
+               curr_cpu = cpumask_next(curr_cpu, def);
+               if (curr_cpu >= nr_cpu_ids)
+                       break;
        }
        /* def mask has non-rcv, rcv has recv mask */
        rcv_cpu = cpumask_first(rcv);
@@ -9062,12 +9059,20 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
                        if (sdma_cpu >= nr_cpu_ids)
                                sdma_cpu = cpumask_first(def);
                } else if (handler == receive_context_interrupt) {
-                       dd_dev_info(dd, "rcv ctxt %d cpu %d\n",
-                               rcd->ctxt, rcv_cpu);
-                       cpumask_set_cpu(rcv_cpu, dd->msix_entries[i].mask);
-                       rcv_cpu = cpumask_next(rcv_cpu, rcv);
-                       if (rcv_cpu >= nr_cpu_ids)
-                               rcv_cpu = cpumask_first(rcv);
+                       dd_dev_info(dd, "rcv ctxt %d cpu %d\n", rcd->ctxt,
+                                   (rcd->ctxt == HFI1_CTRL_CTXT) ?
+                                           cpumask_first(def) : rcv_cpu);
+                       if (rcd->ctxt == HFI1_CTRL_CTXT) {
+                               /* map to first default */
+                               cpumask_set_cpu(cpumask_first(def),
+                                               dd->msix_entries[i].mask);
+                       } else {
+                               cpumask_set_cpu(rcv_cpu,
+                                               dd->msix_entries[i].mask);
+                               rcv_cpu = cpumask_next(rcv_cpu, rcv);
+                               if (rcv_cpu >= nr_cpu_ids)
+                                       rcv_cpu = cpumask_first(rcv);
+                       }
                } else {
                        /* otherwise first def */
                        dd_dev_info(dd, "%s cpu %d\n",
@@ -9200,11 +9205,18 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
        /*
         * Kernel contexts: (to be fixed later):
         * - min or 2 or 1 context/numa
-        * - Context 0 - default/errors
-        * - Context 1 - VL15
+        * - Context 0 - control context (VL15/multicast/error)
+        * - Context 1 - default context
         */
        if (n_krcvqs)
-               num_kernel_contexts = n_krcvqs + MIN_KERNEL_KCTXTS;
+               /*
+                * Don't count context 0 in n_krcvqs since
+                * is isn't used for normal verbs traffic.
+                *
+                * krcvqs will reflect number of kernel
+                * receive contexts above 0.
+                */
+               num_kernel_contexts = n_krcvqs + MIN_KERNEL_KCTXTS - 1;
        else
                num_kernel_contexts = num_online_nodes();
        num_kernel_contexts =
@@ -10053,12 +10065,6 @@ static void init_qpmap_table(struct hfi1_devdata *dd,
        u64 ctxt = first_ctxt;
 
        for (i = 0; i < 256;) {
-               if (ctxt == VL15CTXT) {
-                       ctxt++;
-                       if (ctxt > last_ctxt)
-                               ctxt = first_ctxt;
-                       continue;
-               }
                reg |= ctxt << (8 * (i % 8));
                i++;
                ctxt++;
@@ -10171,19 +10177,13 @@ static void init_qos(struct hfi1_devdata *dd, u32 first_ctxt)
        /* Enable RSM */
        add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
        kfree(rsmmap);
-       /* map everything else (non-VL15) to context 0 */
-       init_qpmap_table(
-               dd,
-               0,
-               0);
+       /* map everything else to first context */
+       init_qpmap_table(dd, FIRST_KERNEL_KCTXT, MIN_KERNEL_KCTXTS - 1);
        dd->qos_shift = n + 1;
        return;
 bail:
        dd->qos_shift = 1;
-       init_qpmap_table(
-               dd,
-               dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0,
-               dd->n_krcv_queues - 1);
+       init_qpmap_table(dd, FIRST_KERNEL_KCTXT, dd->n_krcv_queues - 1);
 }
 
 static void init_rxe(struct hfi1_devdata *dd)
index 487d58778d700086bfba63e158cdbb0fbe8a79e5..4c52e785de68eefa48e2132a9a46ae93ea41db71 100644 (file)
@@ -509,28 +509,49 @@ static inline void init_ps_mdata(struct ps_mdata *mdata,
        mdata->maxcnt = packet->maxcnt;
        mdata->ps_head = packet->rhqoff;
 
-       if (HFI1_CAP_IS_KSET(DMA_RTAIL)) {
+       if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
                mdata->ps_tail = get_rcvhdrtail(rcd);
-               mdata->ps_seq = 0; /* not used with DMA_RTAIL */
+               if (rcd->ctxt == HFI1_CTRL_CTXT)
+                       mdata->ps_seq = rcd->seq_cnt;
+               else
+                       mdata->ps_seq = 0; /* not used with DMA_RTAIL */
        } else {
                mdata->ps_tail = 0; /* used only with DMA_RTAIL*/
                mdata->ps_seq = rcd->seq_cnt;
        }
 }
 
-static inline int ps_done(struct ps_mdata *mdata, u64 rhf)
+static inline int ps_done(struct ps_mdata *mdata, u64 rhf,
+                         struct hfi1_ctxtdata *rcd)
 {
-       if (HFI1_CAP_IS_KSET(DMA_RTAIL))
+       if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
                return mdata->ps_head == mdata->ps_tail;
        return mdata->ps_seq != rhf_rcv_seq(rhf);
 }
 
-static inline void update_ps_mdata(struct ps_mdata *mdata)
+static inline int ps_skip(struct ps_mdata *mdata, u64 rhf,
+                         struct hfi1_ctxtdata *rcd)
+{
+       /*
+        * Control context can potentially receive an invalid rhf.
+        * Drop such packets.
+        */
+       if ((rcd->ctxt == HFI1_CTRL_CTXT) && (mdata->ps_head != mdata->ps_tail))
+               return mdata->ps_seq != rhf_rcv_seq(rhf);
+
+       return 0;
+}
+
+static inline void update_ps_mdata(struct ps_mdata *mdata,
+                                  struct hfi1_ctxtdata *rcd)
 {
        mdata->ps_head += mdata->rsize;
        if (mdata->ps_head >= mdata->maxcnt)
                mdata->ps_head = 0;
-       if (!HFI1_CAP_IS_KSET(DMA_RTAIL)) {
+
+       /* Control context must do seq counting */
+       if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ||
+           (rcd->ctxt == HFI1_CTRL_CTXT)) {
                if (++mdata->ps_seq > 13)
                        mdata->ps_seq = 1;
        }
@@ -566,9 +587,12 @@ static void prescan_rxq(struct hfi1_packet *packet)
                int is_ecn = 0;
                u8 lnh;
 
-               if (ps_done(&mdata, rhf))
+               if (ps_done(&mdata, rhf, rcd))
                        break;
 
+               if (ps_skip(&mdata, rhf, rcd))
+                       goto next;
+
                if (etype != RHF_RCV_TYPE_IB)
                        goto next;
 
@@ -606,8 +630,34 @@ static void prescan_rxq(struct hfi1_packet *packet)
                bth1 &= ~(HFI1_FECN_SMASK | HFI1_BECN_SMASK);
                ohdr->bth[1] = cpu_to_be32(bth1);
 next:
-               update_ps_mdata(&mdata);
+               update_ps_mdata(&mdata, rcd);
+       }
+}
+
+static inline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
+{
+       int ret = RCV_PKT_OK;
+
+       /* Set up for the next packet */
+       packet->rhqoff += packet->rsize;
+       if (packet->rhqoff >= packet->maxcnt)
+               packet->rhqoff = 0;
+
+       packet->numpkt++;
+       if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0)) {
+               if (thread) {
+                       cond_resched();
+               } else {
+                       ret = RCV_PKT_LIMIT;
+                       this_cpu_inc(*packet->rcd->dd->rcv_limit);
+               }
        }
+
+       packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
+                                    packet->rcd->dd->rhf_offset;
+       packet->rhf = rhf_to_cpu(packet->rhf_addr);
+
+       return ret;
 }
 #endif /* CONFIG_PRESCAN_RXQ */
 
@@ -784,7 +834,6 @@ int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread)
 
        while (last == RCV_PKT_OK) {
                last = process_rcv_packet(&packet, thread);
-               hdrqtail = get_rcvhdrtail(rcd);
                if (packet.rhqoff == hdrqtail)
                        last = RCV_PKT_DONE;
                process_rcv_update(last, &packet);
@@ -799,7 +848,7 @@ static inline void set_all_nodma_rtail(struct hfi1_devdata *dd)
 {
        int i;
 
-       for (i = 0; i < dd->first_user_ctxt; i++)
+       for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
                dd->rcd[i]->do_interrupt =
                        &handle_receive_interrupt_nodma_rtail;
 }
@@ -808,7 +857,7 @@ static inline void set_all_dma_rtail(struct hfi1_devdata *dd)
 {
        int i;
 
-       for (i = 0; i < dd->first_user_ctxt; i++)
+       for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
                dd->rcd[i]->do_interrupt =
                        &handle_receive_interrupt_dma_rtail;
 }
@@ -824,12 +873,16 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
 {
        struct hfi1_devdata *dd = rcd->dd;
        u32 hdrqtail;
-       int last = RCV_PKT_OK, needset = 1;
+       int needset, last = RCV_PKT_OK;
        struct hfi1_packet packet;
+       int skip_pkt = 0;
+
+       /* Control context will always use the slow path interrupt handler */
+       needset = (rcd->ctxt == HFI1_CTRL_CTXT) ? 0 : 1;
 
        init_packet(rcd, &packet);
 
-       if (!HFI1_CAP_IS_KSET(DMA_RTAIL)) {
+       if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
                u32 seq = rhf_rcv_seq(packet.rhf);
 
                if (seq != rcd->seq_cnt) {
@@ -844,6 +897,17 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
                        goto bail;
                }
                smp_rmb();  /* prevent speculative reads of dma'ed hdrq */
+
+               /*
+                * Control context can potentially receive an invalid
+                * rhf. Drop such packets.
+                */
+               if (rcd->ctxt == HFI1_CTRL_CTXT) {
+                       u32 seq = rhf_rcv_seq(packet.rhf);
+
+                       if (seq != rcd->seq_cnt)
+                               skip_pkt = 1;
+               }
        }
 
        prescan_rxq(&packet);
@@ -861,11 +925,14 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
                                          dd->rhf_offset;
                        packet.rhf = rhf_to_cpu(packet.rhf_addr);
 
+               } else if (skip_pkt) {
+                       last = skip_rcv_packet(&packet, thread);
+                       skip_pkt = 0;
                } else {
                        last = process_rcv_packet(&packet, thread);
                }
 
-               if (!HFI1_CAP_IS_KSET(DMA_RTAIL)) {
+               if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
                        u32 seq = rhf_rcv_seq(packet.rhf);
 
                        if (++rcd->seq_cnt > 13)
@@ -881,6 +948,19 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
                } else {
                        if (packet.rhqoff == hdrqtail)
                                last = RCV_PKT_DONE;
+                       /*
+                        * Control context can potentially receive an invalid
+                        * rhf. Drop such packets.
+                        */
+                       if (rcd->ctxt == HFI1_CTRL_CTXT) {
+                               u32 seq = rhf_rcv_seq(packet.rhf);
+
+                               if (++rcd->seq_cnt > 13)
+                                       rcd->seq_cnt = 1;
+                               if (!last && (seq != rcd->seq_cnt))
+                                       skip_pkt = 1;
+                       }
+
                        if (needset) {
                                dd_dev_info(dd,
                                            "Switching to DMA_RTAIL\n");
index b048cf6960d9a201df3d598006e0031283e89bff..54ed6b36c1a78ed63daa9f20c9cd387bafbfdd47 100644 (file)
@@ -99,6 +99,12 @@ extern unsigned long hfi1_cap_mask;
 #define HFI1_MISC_GET() ((hfi1_cap_mask >> HFI1_CAP_MISC_SHIFT) & \
                        HFI1_CAP_MISC_MASK)
 
+/*
+ * Control context is always 0 and handles the error packets.
+ * It also handles the VL15 and multicast packets.
+ */
+#define HFI1_CTRL_CTXT    0
+
 /*
  * per driver stats, either not device nor port-specific, or
  * summed over all of the devices and ports.
@@ -234,7 +240,7 @@ struct hfi1_ctxtdata {
        /* chip offset of PIO buffers for this ctxt */
        u32 piobufs;
        /* per-context configuration flags */
-       u16 flags;
+       u32 flags;
        /* per-context event flags for fileops/intr communication */
        unsigned long event_flags;
        /* WAIT_RCV that timed out, no interrupt */
index c17cef6938fb3254d33337f572322cdbf2cbb1f2..1c8286f4c00cab1f7ef9da61569866753ced511f 100644 (file)
@@ -90,7 +90,7 @@ MODULE_PARM_DESC(
 u8 krcvqs[RXE_NUM_DATA_VL];
 int krcvqsset;
 module_param_array(krcvqs, byte, &krcvqsset, S_IRUGO);
-MODULE_PARM_DESC(krcvqs, "Array of the number of kernel receive queues by VL");
+MODULE_PARM_DESC(krcvqs, "Array of the number of non-control kernel receive queues by VL");
 
 /* computed based on above array */
 unsigned n_krcvqs;
@@ -130,6 +130,9 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
        int ret;
        int local_node_id = pcibus_to_node(dd->pcidev->bus);
 
+       /* Control context has to be always 0 */
+       BUILD_BUG_ON(HFI1_CTRL_CTXT != 0);
+
        if (local_node_id < 0)
                local_node_id = numa_node_id();
        dd->assigned_node_id = local_node_id;
@@ -159,6 +162,10 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
                        HFI1_CAP_KGET(NODROP_RHQ_FULL) |
                        HFI1_CAP_KGET(NODROP_EGR_FULL) |
                        HFI1_CAP_KGET(DMA_RTAIL);
+
+               /* Control context must use DMA_RTAIL */
+               if (rcd->ctxt == HFI1_CTRL_CTXT)
+                       rcd->flags |= HFI1_CAP_DMA_RTAIL;
                rcd->seq_cnt = 1;
 
                rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node);