net: qed: adding hw_err states and handling
authorIgor Russkikh <irusskikh@marvell.com>
Thu, 14 May 2020 09:57:17 +0000 (12:57 +0300)
committerDavid S. Miller <davem@davemloft.net>
Thu, 14 May 2020 20:25:46 +0000 (13:25 -0700)
Here we introduce qed device error tracking flags and error types.

qed_hw_err_notify is an entrace point to report errors.
It'll notify higher level drivers (qede/qedr/etc) to handle and recover
the error.

List of posible errors comes from hardware interfaces, but could be
extended in future.

Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/qlogic/qed/qed.h
drivers/net/ethernet/qlogic/qed/qed_hw.c
drivers/net/ethernet/qlogic/qed/qed_hw.h
drivers/net/ethernet/qlogic/qed/qed_main.c
include/linux/qed/qed_if.h

index fa41bf08a58951c4ff216d7ba612f1a74d65d44e..12c40ce3d876785845f0d775f202497f23ea04af 100644 (file)
@@ -1020,6 +1020,8 @@ u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
                   u32 input_len, u8 *input_buf,
                   u32 max_size, u8 *unzip_buf);
 void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn);
+void qed_hw_error_occurred(struct qed_hwfn *p_hwfn,
+                          enum qed_hw_err_type err_type);
 void qed_get_protocol_stats(struct qed_dev *cdev,
                            enum qed_mcp_protocol_type type,
                            union qed_mcp_protocol_stats *stats);
index 4ab8cfaf63d16dc03178f1b09c41c0eba855bb68..90b777019cf53dd98eed7ab1a8fea5532fc82157 100644 (file)
@@ -837,6 +837,38 @@ int qed_dmae_host2host(struct qed_hwfn *p_hwfn,
        return rc;
 }
 
+void qed_hw_err_notify(struct qed_hwfn *p_hwfn,
+                      struct qed_ptt *p_ptt,
+                      enum qed_hw_err_type err_type, char *fmt, ...)
+{
+       char buf[QED_HW_ERR_MAX_STR_SIZE];
+       va_list vl;
+       int len;
+
+       if (fmt) {
+               va_start(vl, fmt);
+               len = vsnprintf(buf, QED_HW_ERR_MAX_STR_SIZE, fmt, vl);
+               va_end(vl);
+
+               if (len > QED_HW_ERR_MAX_STR_SIZE - 1)
+                       len = QED_HW_ERR_MAX_STR_SIZE - 1;
+
+               DP_NOTICE(p_hwfn, "%s", buf);
+       }
+
+       /* Fan failure cannot be masked by handling of another HW error */
+       if (p_hwfn->cdev->recov_in_prog &&
+           err_type != QED_HW_ERR_FAN_FAIL) {
+               DP_VERBOSE(p_hwfn,
+                          NETIF_MSG_DRV,
+                          "Recovery is in progress. Avoid notifying about HW error %d.\n",
+                          err_type);
+               return;
+       }
+
+       qed_hw_error_occurred(p_hwfn, err_type);
+}
+
 int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
                    struct qed_ptt *p_ptt, const char *phase)
 {
index 505e94db939dbc4c51aba338c8142463bd2043ca..f5b109b04b66f724b64069ab806bb596d3d29e69 100644 (file)
@@ -315,4 +315,19 @@ int qed_init_fw_data(struct qed_dev *cdev,
 int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
                    struct qed_ptt *p_ptt, const char *phase);
 
+#define QED_HW_ERR_MAX_STR_SIZE 256
+
+/**
+ * @brief qed_hw_err_notify - Notify upper layer driver and management FW
+ *     about a HW error.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param err_type
+ * @param fmt - debug data buffer to send to the MFW
+ * @param ... - buffer format args
+ */
+void qed_hw_err_notify(struct qed_hwfn *p_hwfn,
+                      struct qed_ptt *p_ptt,
+                      enum qed_hw_err_type err_type, char *fmt, ...);
 #endif
index 38a1d26ca9db38792082ca0855c336bc8102e793..d7c9d94e4c59c8f95a2ddee30a3c727d2780d69a 100644 (file)
@@ -2468,6 +2468,35 @@ void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn)
                ops->schedule_recovery_handler(cookie);
 }
 
+char *qed_hw_err_type_descr[] = {
+       [QED_HW_ERR_FAN_FAIL]           = "Fan Failure",
+       [QED_HW_ERR_MFW_RESP_FAIL]      = "MFW Response Failure",
+       [QED_HW_ERR_HW_ATTN]            = "HW Attention",
+       [QED_HW_ERR_DMAE_FAIL]          = "DMAE Failure",
+       [QED_HW_ERR_RAMROD_FAIL]        = "Ramrod Failure",
+       [QED_HW_ERR_FW_ASSERT]          = "FW Assertion",
+       [QED_HW_ERR_LAST]               = "Unknown",
+};
+
+void qed_hw_error_occurred(struct qed_hwfn *p_hwfn,
+                          enum qed_hw_err_type err_type)
+{
+       struct qed_common_cb_ops *ops = p_hwfn->cdev->protocol_ops.common;
+       void *cookie = p_hwfn->cdev->ops_cookie;
+       char *err_str;
+
+       if (err_type > QED_HW_ERR_LAST)
+               err_type = QED_HW_ERR_LAST;
+       err_str = qed_hw_err_type_descr[err_type];
+
+       DP_NOTICE(p_hwfn, "HW error occurred [%s]\n", err_str);
+
+       /* Call the HW error handler of the protocol driver
+        */
+       if (ops && ops->schedule_hw_err_handler)
+               ops->schedule_hw_err_handler(cookie, err_type);
+}
+
 static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
                            void *handle)
 {
index 8f29e0d8a7b382083dddcf11c38f7dcf3be0da4d..1b7d9548ee433cfce7c2051e26aa632ece7141e9 100644 (file)
@@ -607,6 +607,16 @@ struct qed_sb_info {
        struct qed_dev *cdev;
 };
 
+enum qed_hw_err_type {
+       QED_HW_ERR_FAN_FAIL,
+       QED_HW_ERR_MFW_RESP_FAIL,
+       QED_HW_ERR_HW_ATTN,
+       QED_HW_ERR_DMAE_FAIL,
+       QED_HW_ERR_RAMROD_FAIL,
+       QED_HW_ERR_FW_ASSERT,
+       QED_HW_ERR_LAST,
+};
+
 enum qed_dev_type {
        QED_DEV_TYPE_BB,
        QED_DEV_TYPE_AH,
@@ -814,6 +824,8 @@ struct qed_common_cb_ops {
        void    (*link_update)(void                     *dev,
                               struct qed_link_output   *link);
        void (*schedule_recovery_handler)(void *dev);
+       void (*schedule_hw_err_handler)(void *dev,
+                                       enum qed_hw_err_type err_type);
        void    (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type);
        void (*get_generic_tlv_data)(void *dev, struct qed_generic_tlvs *data);
        void (*get_protocol_tlv_data)(void *dev, void *data);