net: hns3: Add enable and process common ecc errors
authorShiju Jose <shiju.jose@huawei.com>
Fri, 19 Oct 2018 19:15:29 +0000 (20:15 +0100)
committerDavid S. Miller <davem@davemloft.net>
Tue, 23 Oct 2018 02:31:13 +0000 (19:31 -0700)
This patch adds enable and processing of ecc errors from
common HNS blocks, CMDQ(Command Queue),
IMP(Integrated Management Processor) and TQP(Task Queue Pair).

Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h

index 1ccde67db77087c4bc2fd1332253aeb73e911197..8525f18f3f5a34b2f90aa5a38fd049c82d69708f 100644 (file)
@@ -209,6 +209,9 @@ enum hclge_opcode_type {
 
        /* Led command */
        HCLGE_OPC_LED_STATUS_CFG        = 0xB000,
+
+       /* Error INT commands */
+       HCLGE_COMMON_ECC_INT_CFG        = 0x1505,
 };
 
 #define HCLGE_TQP_REG_OFFSET           0x80000
index d2640d14522224842d0ce5e3ce3477a09b3db3f8..8b37de49e416537619785b128166950a3bcd485e 100644 (file)
@@ -3,7 +3,292 @@
 
 #include "hclge_err.h"
 
+static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = {
+       { .int_msk = BIT(0), .msg = "imp_itcm0_ecc_1bit_err" },
+       { .int_msk = BIT(1), .msg = "imp_itcm0_ecc_mbit_err" },
+       { .int_msk = BIT(2), .msg = "imp_itcm1_ecc_1bit_err" },
+       { .int_msk = BIT(3), .msg = "imp_itcm1_ecc_mbit_err" },
+       { .int_msk = BIT(4), .msg = "imp_itcm2_ecc_1bit_err" },
+       { .int_msk = BIT(5), .msg = "imp_itcm2_ecc_mbit_err" },
+       { .int_msk = BIT(6), .msg = "imp_itcm3_ecc_1bit_err" },
+       { .int_msk = BIT(7), .msg = "imp_itcm3_ecc_mbit_err" },
+       { .int_msk = BIT(8), .msg = "imp_dtcm0_mem0_ecc_1bit_err" },
+       { .int_msk = BIT(9), .msg = "imp_dtcm0_mem0_ecc_mbit_err" },
+       { .int_msk = BIT(10), .msg = "imp_dtcm0_mem1_ecc_1bit_err" },
+       { .int_msk = BIT(11), .msg = "imp_dtcm0_mem1_ecc_mbit_err" },
+       { .int_msk = BIT(12), .msg = "imp_dtcm1_mem0_ecc_1bit_err" },
+       { .int_msk = BIT(13), .msg = "imp_dtcm1_mem0_ecc_mbit_err" },
+       { .int_msk = BIT(14), .msg = "imp_dtcm1_mem1_ecc_1bit_err" },
+       { .int_msk = BIT(15), .msg = "imp_dtcm1_mem1_ecc_mbit_err" },
+       { /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_imp_itcm4_ecc_int[] = {
+       { .int_msk = BIT(0), .msg = "imp_itcm4_ecc_1bit_err" },
+       { .int_msk = BIT(1), .msg = "imp_itcm4_ecc_mbit_err" },
+       { /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = {
+       { .int_msk = BIT(0), .msg = "cmdq_nic_rx_depth_ecc_1bit_err" },
+       { .int_msk = BIT(1), .msg = "cmdq_nic_rx_depth_ecc_mbit_err" },
+       { .int_msk = BIT(2), .msg = "cmdq_nic_tx_depth_ecc_1bit_err" },
+       { .int_msk = BIT(3), .msg = "cmdq_nic_tx_depth_ecc_mbit_err" },
+       { .int_msk = BIT(4), .msg = "cmdq_nic_rx_tail_ecc_1bit_err" },
+       { .int_msk = BIT(5), .msg = "cmdq_nic_rx_tail_ecc_mbit_err" },
+       { .int_msk = BIT(6), .msg = "cmdq_nic_tx_tail_ecc_1bit_err" },
+       { .int_msk = BIT(7), .msg = "cmdq_nic_tx_tail_ecc_mbit_err" },
+       { .int_msk = BIT(8), .msg = "cmdq_nic_rx_head_ecc_1bit_err" },
+       { .int_msk = BIT(9), .msg = "cmdq_nic_rx_head_ecc_mbit_err" },
+       { .int_msk = BIT(10), .msg = "cmdq_nic_tx_head_ecc_1bit_err" },
+       { .int_msk = BIT(11), .msg = "cmdq_nic_tx_head_ecc_mbit_err" },
+       { .int_msk = BIT(12), .msg = "cmdq_nic_rx_addr_ecc_1bit_err" },
+       { .int_msk = BIT(13), .msg = "cmdq_nic_rx_addr_ecc_mbit_err" },
+       { .int_msk = BIT(14), .msg = "cmdq_nic_tx_addr_ecc_1bit_err" },
+       { .int_msk = BIT(15), .msg = "cmdq_nic_tx_addr_ecc_mbit_err" },
+       { /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_cmdq_rocee_mem_ecc_int[] = {
+       { .int_msk = BIT(0), .msg = "cmdq_rocee_rx_depth_ecc_1bit_err" },
+       { .int_msk = BIT(1), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err" },
+       { .int_msk = BIT(2), .msg = "cmdq_rocee_tx_depth_ecc_1bit_err" },
+       { .int_msk = BIT(3), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err" },
+       { .int_msk = BIT(4), .msg = "cmdq_rocee_rx_tail_ecc_1bit_err" },
+       { .int_msk = BIT(5), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err" },
+       { .int_msk = BIT(6), .msg = "cmdq_rocee_tx_tail_ecc_1bit_err" },
+       { .int_msk = BIT(7), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err" },
+       { .int_msk = BIT(8), .msg = "cmdq_rocee_rx_head_ecc_1bit_err" },
+       { .int_msk = BIT(9), .msg = "cmdq_rocee_rx_head_ecc_mbit_err" },
+       { .int_msk = BIT(10), .msg = "cmdq_rocee_tx_head_ecc_1bit_err" },
+       { .int_msk = BIT(11), .msg = "cmdq_rocee_tx_head_ecc_mbit_err" },
+       { .int_msk = BIT(12), .msg = "cmdq_rocee_rx_addr_ecc_1bit_err" },
+       { .int_msk = BIT(13), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err" },
+       { .int_msk = BIT(14), .msg = "cmdq_rocee_tx_addr_ecc_1bit_err" },
+       { .int_msk = BIT(15), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err" },
+       { /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = {
+       { .int_msk = BIT(0), .msg = "tqp_int_cfg_even_ecc_1bit_err" },
+       { .int_msk = BIT(1), .msg = "tqp_int_cfg_odd_ecc_1bit_err" },
+       { .int_msk = BIT(2), .msg = "tqp_int_ctrl_even_ecc_1bit_err" },
+       { .int_msk = BIT(3), .msg = "tqp_int_ctrl_odd_ecc_1bit_err" },
+       { .int_msk = BIT(4), .msg = "tx_que_scan_int_ecc_1bit_err" },
+       { .int_msk = BIT(5), .msg = "rx_que_scan_int_ecc_1bit_err" },
+       { .int_msk = BIT(6), .msg = "tqp_int_cfg_even_ecc_mbit_err" },
+       { .int_msk = BIT(7), .msg = "tqp_int_cfg_odd_ecc_mbit_err" },
+       { .int_msk = BIT(8), .msg = "tqp_int_ctrl_even_ecc_mbit_err" },
+       { .int_msk = BIT(9), .msg = "tqp_int_ctrl_odd_ecc_mbit_err" },
+       { .int_msk = BIT(10), .msg = "tx_que_scan_int_ecc_mbit_err" },
+       { .int_msk = BIT(11), .msg = "rx_que_scan_int_ecc_mbit_err" },
+       { /* sentinel */ }
+};
+
+static void hclge_log_error(struct device *dev,
+                           const struct hclge_hw_error *err_list,
+                           u32 err_sts)
+{
+       const struct hclge_hw_error *err;
+       int i = 0;
+
+       while (err_list[i].msg) {
+               err = &err_list[i];
+               if (!(err->int_msk & err_sts)) {
+                       i++;
+                       continue;
+               }
+               dev_warn(dev, "%s [error status=0x%x] found\n",
+                        err->msg, err_sts);
+               i++;
+       }
+}
+
+/* hclge_cmd_query_error: read the error information
+ * @hdev: pointer to struct hclge_dev
+ * @desc: descriptor for describing the command
+ * @cmd:  command opcode
+ * @flag: flag for extended command structure
+ * @w_num: offset for setting the read interrupt type.
+ * @int_type: select which type of the interrupt for which the error
+ * info will be read(RAS-CE/RAS-NFE/RAS-FE etc).
+ *
+ * This function query the error info from hw register/s using command
+ */
+static int hclge_cmd_query_error(struct hclge_dev *hdev,
+                                struct hclge_desc *desc, u32 cmd,
+                                u16 flag, u8 w_num,
+                                enum hclge_err_int_type int_type)
+{
+       struct device *dev = &hdev->pdev->dev;
+       int num = 1;
+       int ret;
+
+       hclge_cmd_setup_basic_desc(&desc[0], cmd, true);
+       if (flag) {
+               desc[0].flag |= cpu_to_le16(flag);
+               hclge_cmd_setup_basic_desc(&desc[1], cmd, true);
+               num = 2;
+       }
+       if (w_num)
+               desc[0].data[w_num] = cpu_to_le32(int_type);
+
+       ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
+       if (ret)
+               dev_err(dev, "query error cmd failed (%d)\n", ret);
+
+       return ret;
+}
+
+/* hclge_cmd_clear_error: clear the error status
+ * @hdev: pointer to struct hclge_dev
+ * @desc: descriptor for describing the command
+ * @desc_src: prefilled descriptor from the previous command for reusing
+ * @cmd:  command opcode
+ * @flag: flag for extended command structure
+ *
+ * This function clear the error status in the hw register/s using command
+ */
+static int hclge_cmd_clear_error(struct hclge_dev *hdev,
+                                struct hclge_desc *desc,
+                                struct hclge_desc *desc_src,
+                                u32 cmd, u16 flag)
+{
+       struct device *dev = &hdev->pdev->dev;
+       int num = 1;
+       int ret, i;
+
+       if (cmd) {
+               hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
+               if (flag) {
+                       desc[0].flag |= cpu_to_le16(flag);
+                       hclge_cmd_setup_basic_desc(&desc[1], cmd, false);
+                       num = 2;
+               }
+               if (desc_src) {
+                       for (i = 0; i < 6; i++) {
+                               desc[0].data[i] = desc_src[0].data[i];
+                               if (flag)
+                                       desc[1].data[i] = desc_src[1].data[i];
+                       }
+               }
+       } else {
+               hclge_cmd_reuse_desc(&desc[0], false);
+               if (flag) {
+                       desc[0].flag |= cpu_to_le16(flag);
+                       hclge_cmd_reuse_desc(&desc[1], false);
+                       num = 2;
+               }
+       }
+       ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
+       if (ret)
+               dev_err(dev, "clear error cmd failed (%d)\n", ret);
+
+       return ret;
+}
+
+static int hclge_enable_common_error(struct hclge_dev *hdev, bool en)
+{
+       struct device *dev = &hdev->pdev->dev;
+       struct hclge_desc desc[2];
+       int ret;
+
+       hclge_cmd_setup_basic_desc(&desc[0], HCLGE_COMMON_ECC_INT_CFG, false);
+       desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+       hclge_cmd_setup_basic_desc(&desc[1], HCLGE_COMMON_ECC_INT_CFG, false);
+
+       if (en) {
+               /* enable COMMON error interrupts */
+               desc[0].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN);
+               desc[0].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN |
+                                       HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN);
+               desc[0].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN);
+               desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN);
+               desc[0].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN);
+       } else {
+               /* disable COMMON error interrupts */
+               desc[0].data[0] = 0;
+               desc[0].data[2] = 0;
+               desc[0].data[3] = 0;
+               desc[0].data[4] = 0;
+               desc[0].data[5] = 0;
+       }
+       desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK);
+       desc[1].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK |
+                               HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK);
+       desc[1].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK);
+       desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK);
+       desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK);
+
+       ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
+       if (ret)
+               dev_err(dev,
+                       "failed(%d) to enable/disable COMMON err interrupts\n",
+                       ret);
+
+       return ret;
+}
+
+static void hclge_process_common_error(struct hclge_dev *hdev,
+                                      enum hclge_err_int_type type)
+{
+       struct device *dev = &hdev->pdev->dev;
+       struct hclge_desc desc[2];
+       u32 err_sts;
+       int ret;
+
+       /* read err sts */
+       ret = hclge_cmd_query_error(hdev, &desc[0],
+                                   HCLGE_COMMON_ECC_INT_CFG,
+                                   HCLGE_CMD_FLAG_NEXT, 0, 0);
+       if (ret) {
+               dev_err(dev,
+                       "failed(=%d) to query COMMON error interrupt status\n",
+                       ret);
+               return;
+       }
+
+       /* log err */
+       err_sts = (le32_to_cpu(desc[0].data[0])) & HCLGE_IMP_TCM_ECC_INT_MASK;
+       hclge_log_error(dev, &hclge_imp_tcm_ecc_int[0], err_sts);
+
+       err_sts = (le32_to_cpu(desc[0].data[1])) & HCLGE_CMDQ_ECC_INT_MASK;
+       hclge_log_error(dev, &hclge_cmdq_nic_mem_ecc_int[0], err_sts);
+
+       err_sts = (le32_to_cpu(desc[0].data[1]) >> HCLGE_CMDQ_ROC_ECC_INT_SHIFT)
+                  & HCLGE_CMDQ_ECC_INT_MASK;
+       hclge_log_error(dev, &hclge_cmdq_rocee_mem_ecc_int[0], err_sts);
+
+       if ((le32_to_cpu(desc[0].data[3])) & BIT(0))
+               dev_warn(dev, "imp_rd_data_poison_err found\n");
+
+       err_sts = (le32_to_cpu(desc[0].data[3]) >> HCLGE_TQP_ECC_INT_SHIFT) &
+                  HCLGE_TQP_ECC_INT_MASK;
+       hclge_log_error(dev, &hclge_tqp_int_ecc_int[0], err_sts);
+
+       err_sts = (le32_to_cpu(desc[0].data[5])) &
+                  HCLGE_IMP_ITCM4_ECC_INT_MASK;
+       hclge_log_error(dev, &hclge_imp_itcm4_ecc_int[0], err_sts);
+
+       /* clear error interrupts */
+       desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_CLR_MASK);
+       desc[1].data[1] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_CLR_MASK |
+                               HCLGE_CMDQ_ROCEE_ECC_CLR_MASK);
+       desc[1].data[3] = cpu_to_le32(HCLGE_TQP_IMP_ERR_CLR_MASK);
+       desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_CLR_MASK);
+
+       ret = hclge_cmd_clear_error(hdev, &desc[0], NULL, 0,
+                                   HCLGE_CMD_FLAG_NEXT);
+       if (ret)
+               dev_err(dev,
+                       "failed(%d) to clear COMMON error interrupt status\n",
+                       ret);
+}
+
 static const struct hclge_hw_blk hw_blk[] = {
+       { .msk = BIT(5), .name = "COMMON",
+         .enable_error = hclge_enable_common_error,
+         .process_error = hclge_process_common_error, },
        { /* sentinel */ }
 };
 
index 373e9bf2756695319a396600f3f9c3881226f23e..b413141085a54d30403911d0ce66d17aa9c10fc0 100644 (file)
 #define HCLGE_RAS_REG_NFE_MASK   0xFF00
 #define HCLGE_RAS_REG_NFE_SHIFT        8
 
+#define HCLGE_IMP_TCM_ECC_ERR_INT_EN   0xFFFF0000
+#define HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK      0xFFFF0000
+#define HCLGE_IMP_ITCM4_ECC_ERR_INT_EN 0x300
+#define HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK    0x300
+#define HCLGE_CMDQ_NIC_ECC_ERR_INT_EN  0xFFFF
+#define HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK     0xFFFF
+#define HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN        0xFFFF0000
+#define HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK   0xFFFF0000
+#define HCLGE_IMP_RD_POISON_ERR_INT_EN 0x0100
+#define HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK    0x0100
+#define HCLGE_TQP_ECC_ERR_INT_EN       0x0FFF
+#define HCLGE_TQP_ECC_ERR_INT_EN_MASK  0x0FFF
+
+#define HCLGE_IMP_TCM_ECC_INT_MASK     0xFFFF
+#define HCLGE_IMP_ITCM4_ECC_INT_MASK   0x3
+#define HCLGE_CMDQ_ECC_INT_MASK                0xFFFF
+#define HCLGE_CMDQ_ROC_ECC_INT_SHIFT   16
+#define HCLGE_TQP_ECC_INT_MASK         0xFFF
+#define HCLGE_TQP_ECC_INT_SHIFT                16
+#define HCLGE_IMP_TCM_ECC_CLR_MASK     0xFFFF
+#define HCLGE_IMP_ITCM4_ECC_CLR_MASK   0x3
+#define HCLGE_CMDQ_NIC_ECC_CLR_MASK    0xFFFF
+#define HCLGE_CMDQ_ROCEE_ECC_CLR_MASK  0xFFFF0000
+#define HCLGE_TQP_IMP_ERR_CLR_MASK     0x0FFF0001
+
 enum hclge_err_int_type {
        HCLGE_ERR_INT_MSIX = 0,
        HCLGE_ERR_INT_RAS_CE = 1,
@@ -26,6 +51,11 @@ struct hclge_hw_blk {
                              enum hclge_err_int_type type);
 };
 
+struct hclge_hw_error {
+       u32 int_msk;
+       const char *msg;
+};
+
 int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state);
 pci_ers_result_t hclge_process_ras_hw_error(struct hnae3_ae_dev *ae_dev);
 #endif