IB/hfi1: Add receive fault injection feature
authorDon Hiatt <don.hiatt@intel.com>
Tue, 21 Mar 2017 00:26:14 +0000 (17:26 -0700)
committerDoug Ledford <dledford@redhat.com>
Wed, 5 Apr 2017 18:45:09 +0000 (14:45 -0400)
Add fault injection capability:
  - Drop packets unconditionally (fault_by_packet)
  - Drop packets based on opcode (fault_by_opcode)

This feature reacts to the global FAULT_INJECTION
config flag.

The faulting traces have been added:
  - misc/fault_opcode
  - misc/fault_packet

See 'Documentation/fault-injection/fault-injection.txt'
for details.

Examples:
  - Dropping packets by opcode:
    /sys/kernel/debug/hfi1/hfi1_X/fault_opcode
# Enable fault
echo Y > fault_by_opcode
# Setprobability of dropping (0-100%)
# echo 25 > probability
# Set opcode
echo 0x64 > opcode
# Number of times to fault
echo 3 > times
# An optional mask allows you to fault
# a range of opcodes
echo 0xf0 > mask
    /sys/kernel/debug/hfi1/hfi1_X/fault_stats
    contains a value in parentheses to indicate
    number of each opcode dropped.

  - Dropping packets unconditionally
    /sys/kernel/debug/hfi1/hfi1_X/fault_packet
# Enable fault
echo Y > fault_by_packet
    /sys/kernel/debug/hfi1/hfi1_X/fault_packet/fault_stats
    contains the number of packets dropped.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Don Hiatt <don.hiatt@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/hfi1/debugfs.c
drivers/infiniband/hw/hfi1/debugfs.h
drivers/infiniband/hw/hfi1/driver.c
drivers/infiniband/hw/hfi1/trace_misc.h
drivers/infiniband/hw/hfi1/verbs.c
drivers/infiniband/hw/hfi1/verbs.h

index 7fe9dd885746e68100da0d1e3db965ff2127bbbe..cac6d5256f4085bf9e10eed17b41ba530fb3b7f6 100644 (file)
 #include <linux/export.h>
 #include <linux/module.h>
 #include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ratelimit.h>
+#include <linux/fault-inject.h>
 
 #include "hfi.h"
+#include "trace.h"
 #include "debugfs.h"
 #include "device.h"
 #include "qp.h"
@@ -1063,6 +1067,217 @@ DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list);
 DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list)
 DEBUGFS_FILE_OPS(sdma_cpu_list);
 
+#ifdef CONFIG_FAULT_INJECTION
+static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
+{
+       struct hfi1_opcode_stats_perctx *opstats;
+
+       if (*pos >= ARRAY_SIZE(opstats->stats))
+               return NULL;
+       return pos;
+}
+
+static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+       struct hfi1_opcode_stats_perctx *opstats;
+
+       ++*pos;
+       if (*pos >= ARRAY_SIZE(opstats->stats))
+               return NULL;
+       return pos;
+}
+
+static void _fault_stats_seq_stop(struct seq_file *s, void *v)
+{
+}
+
+static int _fault_stats_seq_show(struct seq_file *s, void *v)
+{
+       loff_t *spos = v;
+       loff_t i = *spos, j;
+       u64 n_packets = 0, n_bytes = 0;
+       struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+       struct hfi1_devdata *dd = dd_from_dev(ibd);
+
+       for (j = 0; j < dd->first_user_ctxt; j++) {
+               if (!dd->rcd[j])
+                       continue;
+               n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
+               n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes;
+       }
+       if (!n_packets && !n_bytes)
+               return SEQ_SKIP;
+       if (!ibd->fault_opcode->n_rxfaults[i] &&
+           !ibd->fault_opcode->n_txfaults[i])
+               return SEQ_SKIP;
+       seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
+                  (unsigned long long)n_packets,
+                  (unsigned long long)n_bytes,
+                  (unsigned long long)ibd->fault_opcode->n_rxfaults[i],
+                  (unsigned long long)ibd->fault_opcode->n_txfaults[i]);
+       return 0;
+}
+
+DEBUGFS_SEQ_FILE_OPS(fault_stats);
+DEBUGFS_SEQ_FILE_OPEN(fault_stats);
+DEBUGFS_FILE_OPS(fault_stats);
+
+static void fault_exit_opcode_debugfs(struct hfi1_ibdev *ibd)
+{
+       debugfs_remove_recursive(ibd->fault_opcode->dir);
+       kfree(ibd->fault_opcode);
+       ibd->fault_opcode = NULL;
+}
+
+static int fault_init_opcode_debugfs(struct hfi1_ibdev *ibd)
+{
+       struct dentry *parent = ibd->hfi1_ibdev_dbg;
+
+       ibd->fault_opcode = kzalloc(sizeof(*ibd->fault_opcode), GFP_KERNEL);
+       if (!ibd->fault_opcode)
+               return -ENOMEM;
+
+       ibd->fault_opcode->attr.interval = 1;
+       ibd->fault_opcode->attr.require_end = ULONG_MAX;
+       ibd->fault_opcode->attr.stacktrace_depth = 32;
+       ibd->fault_opcode->attr.dname = NULL;
+       ibd->fault_opcode->attr.verbose = 0;
+       ibd->fault_opcode->fault_by_opcode = false;
+       ibd->fault_opcode->opcode = 0;
+       ibd->fault_opcode->mask = 0xff;
+
+       ibd->fault_opcode->dir =
+               fault_create_debugfs_attr("fault_opcode",
+                                         parent,
+                                         &ibd->fault_opcode->attr);
+       if (IS_ERR(ibd->fault_opcode->dir)) {
+               kfree(ibd->fault_opcode);
+               return -ENOENT;
+       }
+
+       DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault_opcode->dir, ibd);
+       if (!debugfs_create_bool("fault_by_opcode", 0600,
+                                ibd->fault_opcode->dir,
+                                &ibd->fault_opcode->fault_by_opcode))
+               goto fail;
+       if (!debugfs_create_x8("opcode", 0600, ibd->fault_opcode->dir,
+                              &ibd->fault_opcode->opcode))
+               goto fail;
+       if (!debugfs_create_x8("mask", 0600, ibd->fault_opcode->dir,
+                              &ibd->fault_opcode->mask))
+               goto fail;
+
+       return 0;
+fail:
+       fault_exit_opcode_debugfs(ibd);
+       return -ENOMEM;
+}
+
+static void fault_exit_packet_debugfs(struct hfi1_ibdev *ibd)
+{
+       debugfs_remove_recursive(ibd->fault_packet->dir);
+       kfree(ibd->fault_packet);
+       ibd->fault_packet = NULL;
+}
+
+static int fault_init_packet_debugfs(struct hfi1_ibdev *ibd)
+{
+       struct dentry *parent = ibd->hfi1_ibdev_dbg;
+
+       ibd->fault_packet = kzalloc(sizeof(*ibd->fault_packet), GFP_KERNEL);
+       if (!ibd->fault_packet)
+               return -ENOMEM;
+
+       ibd->fault_packet->attr.interval = 1;
+       ibd->fault_packet->attr.require_end = ULONG_MAX;
+       ibd->fault_packet->attr.stacktrace_depth = 32;
+       ibd->fault_packet->attr.dname = NULL;
+       ibd->fault_packet->attr.verbose = 0;
+       ibd->fault_packet->fault_by_packet = false;
+
+       ibd->fault_packet->dir =
+               fault_create_debugfs_attr("fault_packet",
+                                         parent,
+                                         &ibd->fault_opcode->attr);
+       if (IS_ERR(ibd->fault_packet->dir)) {
+               kfree(ibd->fault_packet);
+               return -ENOENT;
+       }
+
+       if (!debugfs_create_bool("fault_by_packet", 0600,
+                                ibd->fault_packet->dir,
+                                &ibd->fault_packet->fault_by_packet))
+               goto fail;
+       if (!debugfs_create_u64("fault_stats", 0400,
+                               ibd->fault_packet->dir,
+                               &ibd->fault_packet->n_faults))
+               goto fail;
+
+       return 0;
+fail:
+       fault_exit_packet_debugfs(ibd);
+       return -ENOMEM;
+}
+
+static void fault_exit_debugfs(struct hfi1_ibdev *ibd)
+{
+       fault_exit_opcode_debugfs(ibd);
+       fault_exit_packet_debugfs(ibd);
+}
+
+static int fault_init_debugfs(struct hfi1_ibdev *ibd)
+{
+       int ret = 0;
+
+       ret = fault_init_opcode_debugfs(ibd);
+       if (ret)
+               return ret;
+
+       ret = fault_init_packet_debugfs(ibd);
+       if (ret)
+               fault_exit_opcode_debugfs(ibd);
+
+       return ret;
+}
+
+bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx)
+{
+       bool ret = false;
+       struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);
+
+       if (!ibd->fault_opcode || !ibd->fault_opcode->fault_by_opcode)
+               return false;
+       if (ibd->fault_opcode->opcode != (opcode & ibd->fault_opcode->mask))
+               return false;
+       ret = should_fail(&ibd->fault_opcode->attr, 1);
+       if (ret) {
+               trace_hfi1_fault_opcode(qp, opcode);
+               if (rx)
+                       ibd->fault_opcode->n_rxfaults[opcode]++;
+               else
+                       ibd->fault_opcode->n_txfaults[opcode]++;
+       }
+       return ret;
+}
+
+bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
+{
+       struct rvt_dev_info *rdi = &packet->rcd->ppd->dd->verbs_dev.rdi;
+       struct hfi1_ibdev *ibd = dev_from_rdi(rdi);
+       bool ret = false;
+
+       if (!ibd->fault_packet || !ibd->fault_packet->fault_by_packet)
+               return false;
+
+       ret = should_fail(&ibd->fault_packet->attr, 1);
+       if (ret) {
+               ++ibd->fault_packet->n_faults;
+               trace_hfi1_fault_packet(packet);
+       }
+       return ret;
+}
+#endif
+
 void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
 {
        char name[sizeof("port0counters") + 1];
@@ -1112,12 +1327,19 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
                                            !port_cntr_ops[i].ops.write ?
                                            S_IRUGO : S_IRUGO | S_IWUSR);
                }
+
+#ifdef CONFIG_FAULT_INJECTION
+       fault_init_debugfs(ibd);
+#endif
 }
 
 void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
 {
        if (!hfi1_dbg_root)
                goto out;
+#ifdef CONFIG_FAULT_INJECTION
+       fault_exit_debugfs(ibd);
+#endif
        debugfs_remove(ibd->hfi1_ibdev_link);
        debugfs_remove_recursive(ibd->hfi1_ibdev_dbg);
 out:
index b6fb6814f1b881de72f4633143b9b822193c7072..70be5ca14736a0b793640b3645b3b7f726b563b5 100644 (file)
@@ -53,23 +53,68 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd);
 void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd);
 void hfi1_dbg_init(void);
 void hfi1_dbg_exit(void);
+
+#ifdef CONFIG_FAULT_INJECTION
+#include <linux/fault-inject.h>
+struct fault_opcode {
+       struct fault_attr attr;
+       struct dentry *dir;
+       bool fault_by_opcode;
+       u64 n_rxfaults[256];
+       u64 n_txfaults[256];
+       u8 opcode;
+       u8 mask;
+};
+
+struct fault_packet {
+       struct fault_attr attr;
+       struct dentry *dir;
+       bool fault_by_packet;
+       u64 n_faults;
+};
+
+bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx);
+bool hfi1_dbg_fault_packet(struct hfi1_packet *packet);
+#else
+static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
+{
+       return false;
+}
+
+static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
+                                        u32 opcode, bool rx)
+{
+       return false;
+}
+#endif
+
 #else
 static inline void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
 {
 }
 
-void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
+static inline void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
+{
+}
+
+static inline void hfi1_dbg_init(void)
 {
 }
 
-void hfi1_dbg_init(void)
+static inline void hfi1_dbg_exit(void)
 {
 }
 
-void hfi1_dbg_exit(void)
+static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
 {
+       return false;
 }
 
+static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
+                                        u32 opcode, bool rx)
+{
+       return false;
+}
 #endif
 
 #endif                          /* _HFI1_DEBUGFS_H */
index 3881c951f6af308dd23266ab91c78c000fa4f5d9..c0b012f6e11cbcde6b7c950b801533ceda0d7bf9 100644 (file)
@@ -59,6 +59,7 @@
 #include "trace.h"
 #include "qp.h"
 #include "sdma.h"
+#include "debugfs.h"
 
 #undef pr_fmt
 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -1354,6 +1355,9 @@ void handle_eflags(struct hfi1_packet *packet)
  */
 int process_receive_ib(struct hfi1_packet *packet)
 {
+       if (unlikely(hfi1_dbg_fault_packet(packet)))
+               return RHF_RCV_CONTINUE;
+
        trace_hfi1_rcvhdr(packet->rcd->ppd->dd,
                          packet->rcd->ctxt,
                          rhf_err_flags(packet->rhf),
@@ -1409,6 +1413,8 @@ int process_receive_error(struct hfi1_packet *packet)
 
 int kdeth_process_expected(struct hfi1_packet *packet)
 {
+       if (unlikely(hfi1_dbg_fault_packet(packet)))
+               return RHF_RCV_CONTINUE;
        if (unlikely(rhf_err_flags(packet->rhf)))
                handle_eflags(packet);
 
@@ -1421,6 +1427,8 @@ int kdeth_process_eager(struct hfi1_packet *packet)
 {
        if (unlikely(rhf_err_flags(packet->rhf)))
                handle_eflags(packet);
+       if (unlikely(hfi1_dbg_fault_packet(packet)))
+               return RHF_RCV_CONTINUE;
 
        dd_dev_err(packet->rcd->dd,
                   "Unhandled eager packet received. Dropping.\n");
index d308454af7fdaeea38d4ba5075ff2385415c8f0d..deac77ddaeab644ce9a74d819f88a40de6220548 100644 (file)
@@ -72,6 +72,54 @@ TRACE_EVENT(hfi1_interrupt,
                      __entry->src)
 );
 
+#ifdef CONFIG_FAULT_INJECTION
+TRACE_EVENT(hfi1_fault_opcode,
+           TP_PROTO(struct rvt_qp *qp, u8 opcode),
+           TP_ARGS(qp, opcode),
+           TP_STRUCT__entry(DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+                            __field(u32, qpn)
+                            __field(u8, opcode)
+                            ),
+           TP_fast_assign(DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+                          __entry->qpn = qp->ibqp.qp_num;
+                          __entry->opcode = opcode;
+                          ),
+           TP_printk("[%s] qpn 0x%x opcode 0x%x",
+                     __get_str(dev), __entry->qpn, __entry->opcode)
+);
+
+TRACE_EVENT(hfi1_fault_packet,
+           TP_PROTO(struct hfi1_packet *packet),
+           TP_ARGS(packet),
+           TP_STRUCT__entry(DD_DEV_ENTRY(packet->rcd->ppd->dd)
+                            __field(u64, eflags)
+                            __field(u32, ctxt)
+                            __field(u32, hlen)
+                            __field(u32, tlen)
+                            __field(u32, updegr)
+                            __field(u32, etail)
+                            ),
+            TP_fast_assign(DD_DEV_ASSIGN(packet->rcd->ppd->dd);
+                           __entry->eflags = rhf_err_flags(packet->rhf);
+                           __entry->ctxt = packet->rcd->ctxt;
+                           __entry->hlen = packet->hlen;
+                           __entry->tlen = packet->tlen;
+                           __entry->updegr = packet->updegr;
+                           __entry->etail = rhf_egr_index(packet->rhf);
+                           ),
+            TP_printk(
+               "[%s] ctxt %d eflags 0x%llx hlen %d tlen %d updegr %d etail %d",
+               __get_str(dev),
+               __entry->ctxt,
+               __entry->eflags,
+               __entry->hlen,
+               __entry->tlen,
+               __entry->updegr,
+               __entry->etail
+               )
+);
+#endif
+
 #endif /* __HFI1_TRACE_MISC_H */
 
 #undef TRACE_INCLUDE_PATH
index 928918cc7d809af82374f090c8c25fe777b2feda..9f016daba2565c86aff6219d0aaf27cc9cf5b7e1 100644 (file)
@@ -60,6 +60,7 @@
 #include "trace.h"
 #include "qp.h"
 #include "verbs_txreq.h"
+#include "debugfs.h"
 
 static unsigned int hfi1_lkey_table_size = 16;
 module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
@@ -599,6 +600,11 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
                        rcu_read_unlock();
                        goto drop;
                }
+               if (unlikely(hfi1_dbg_fault_opcode(packet->qp, opcode,
+                                                  true))) {
+                       rcu_read_unlock();
+                       goto drop;
+               }
                spin_lock_irqsave(&packet->qp->r_lock, flags);
                packet_handler = qp_ok(opcode, packet);
                if (likely(packet_handler))
index 3a0b589e41c2105bbfe608af1820d48c6cc55782..2756ec35b054b0011c14d55c299a0707c14f6769 100644 (file)
@@ -195,6 +195,10 @@ struct hfi1_ibdev {
        struct dentry *hfi1_ibdev_dbg;
        /* per HFI symlinks to above */
        struct dentry *hfi1_ibdev_link;
+#ifdef CONFIG_FAULT_INJECTION
+       struct fault_opcode *fault_opcode;
+       struct fault_packet *fault_packet;
+#endif
 #endif
 };