octeontx2-pf: Error handling support
authorGeetha sowjanya <gakula@marvell.com>
Mon, 27 Jan 2020 13:05:24 +0000 (18:35 +0530)
committerDavid S. Miller <davem@davemloft.net>
Mon, 27 Jan 2020 13:33:39 +0000 (14:33 +0100)
HW reports many errors on the receive and transmit paths.
Such as incorrect queue configuration, pkt transmission errors,
LMTST instruction errors, transmit queue full etc. These are reported
via QINT interrupt. Most of the errors are fatal and needs
reinitialization.

Also added support to allocate receive buffers in non-atomic context
when allocation fails in NAPI context.

Signed-off-by: Geetha sowjanya <gakula@marvell.com>
Signed-off-by: Aleksey Makarov <amakarov@marvell.com>
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h
drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h

index 836b3c860e62be7a84025495724b2231c4f2d7b1..2b503066d801538138585c07e4a10e71cd4b199d 100644 (file)
@@ -154,6 +154,13 @@ ret:
        return iova;
 }
 
+void otx2_tx_timeout(struct net_device *netdev, unsigned int txq)
+{
+       struct otx2_nic *pfvf = netdev_priv(netdev);
+
+       schedule_work(&pfvf->reset_task);
+}
+
 void otx2_get_mac_from_af(struct net_device *netdev)
 {
        struct otx2_nic *pfvf = netdev_priv(netdev);
@@ -362,6 +369,7 @@ static int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura)
        aq->rq.lpb_sizem1 = (DMA_BUFFER_LEN(pfvf->rbsize) / 8) - 1;
        aq->rq.xqe_imm_size = 0; /* Copying of packet to CQE not needed */
        aq->rq.flow_tagw = 32; /* Copy full 32bit flow_tag to CQE header */
+       aq->rq.qint_idx = 0;
        aq->rq.lpb_drop_ena = 1; /* Enable RED dropping for AURA */
        aq->rq.xqe_drop_ena = 1; /* Enable RED dropping for CQ/SSO */
        aq->rq.xqe_pass = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt);
@@ -424,6 +432,8 @@ static int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
        aq->sq.default_chan = pfvf->hw.tx_chan_base;
        aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */
        aq->sq.sqb_aura = sqb_aura;
+       aq->sq.sq_int_ena = NIX_SQINT_BITS;
+       aq->sq.qint_idx = 0;
        /* Due pipelining impact minimum 2000 unused SQ CQE's
         * need to maintain to avoid CQ overflow.
         */
@@ -470,6 +480,7 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
        pool_id = ((cq->cq_type == CQ_RX) &&
                   (pfvf->hw.rqpool_cnt != pfvf->hw.rx_queues)) ? 0 : qidx;
        cq->rbpool = &qset->pool[pool_id];
+       cq->refill_task_sched = false;
 
        /* Get memory to put this msg */
        aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox);
@@ -481,6 +492,8 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
        aq->cq.caching = 1;
        aq->cq.base = cq->cqe->iova;
        aq->cq.cint_idx = cq->cint_idx;
+       aq->cq.cq_err_int_ena = NIX_CQERRINT_BITS;
+       aq->cq.qint_idx = 0;
        aq->cq.avg_level = 255;
 
        if (qidx < pfvf->hw.rx_queues) {
@@ -496,6 +509,45 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
        return otx2_sync_mbox_msg(&pfvf->mbox);
 }
 
+static void otx2_pool_refill_task(struct work_struct *work)
+{
+       struct otx2_cq_queue *cq;
+       struct otx2_pool *rbpool;
+       struct refill_work *wrk;
+       int qidx, free_ptrs = 0;
+       struct otx2_nic *pfvf;
+       s64 bufptr;
+
+       wrk = container_of(work, struct refill_work, pool_refill_work.work);
+       pfvf = wrk->pf;
+       qidx = wrk - pfvf->refill_wrk;
+       cq = &pfvf->qset.cq[qidx];
+       rbpool = cq->rbpool;
+       free_ptrs = cq->pool_ptrs;
+
+       while (cq->pool_ptrs) {
+               bufptr = otx2_alloc_rbuf(pfvf, rbpool, GFP_KERNEL);
+               if (bufptr <= 0) {
+                       /* Schedule a WQ if we fails to free atleast half of the
+                        * pointers else enable napi for this RQ.
+                        */
+                       if (!((free_ptrs - cq->pool_ptrs) > free_ptrs / 2)) {
+                               struct delayed_work *dwork;
+
+                               dwork = &wrk->pool_refill_work;
+                               schedule_delayed_work(dwork,
+                                                     msecs_to_jiffies(100));
+                       } else {
+                               cq->refill_task_sched = false;
+                       }
+                       return;
+               }
+               otx2_aura_freeptr(pfvf, qidx, bufptr + OTX2_HEAD_ROOM);
+               cq->pool_ptrs--;
+       }
+       cq->refill_task_sched = false;
+}
+
 int otx2_config_nix_queues(struct otx2_nic *pfvf)
 {
        int qidx, err;
@@ -525,6 +577,17 @@ int otx2_config_nix_queues(struct otx2_nic *pfvf)
                        return err;
        }
 
+       /* Initialize work queue for receive buffer refill */
+       pfvf->refill_wrk = devm_kcalloc(pfvf->dev, pfvf->qset.cq_cnt,
+                                       sizeof(struct refill_work), GFP_KERNEL);
+       if (!pfvf->refill_wrk)
+               return -ENOMEM;
+
+       for (qidx = 0; qidx < pfvf->qset.cq_cnt; qidx++) {
+               pfvf->refill_wrk[qidx].pf = pfvf;
+               INIT_DELAYED_WORK(&pfvf->refill_wrk[qidx].pool_refill_work,
+                                 otx2_pool_refill_task);
+       }
        return 0;
 }
 
index 8c29b63b319e5a7be6061f6acfad10ecb5eab428..2a812110995fb4daa319cc59b188f02f48855290 100644 (file)
@@ -129,6 +129,11 @@ struct otx2_hw {
        struct otx2_drv_stats   drv_stats;
 };
 
+struct refill_work {
+       struct delayed_work pool_refill_work;
+       struct otx2_nic *pf;
+};
+
 struct otx2_nic {
        void __iomem            *reg_base;
        struct net_device       *netdev;
@@ -151,6 +156,10 @@ struct otx2_nic {
        u16                     pcifunc; /* RVU PF_FUNC */
        struct cgx_link_user_info linfo;
 
+       u64                     reset_count;
+       struct work_struct      reset_task;
+       struct refill_work      *refill_wrk;
+
        /* Block address of NIX either BLKADDR_NIX0 or BLKADDR_NIX1 */
        int                     nix_blkaddr;
 };
@@ -435,6 +444,9 @@ otx2_mbox_up_handler_ ## _fn_name(struct otx2_nic *pfvf,            \
 MBOX_UP_CGX_MESSAGES
 #undef M
 
+/* Time to wait before watchdog kicks off */
+#define OTX2_TX_TIMEOUT                (100 * HZ)
+
 #define        RVU_PFVF_PF_SHIFT       10
 #define        RVU_PFVF_PF_MASK        0x3F
 #define        RVU_PFVF_FUNC_SHIFT     0
@@ -472,6 +484,7 @@ void otx2_free_cints(struct otx2_nic *pfvf, int n);
 void otx2_set_cints_affinity(struct otx2_nic *pfvf);
 int otx2_set_mac_address(struct net_device *netdev, void *p);
 int otx2_hw_set_mtu(struct otx2_nic *pfvf, int mtu);
+void otx2_tx_timeout(struct net_device *netdev, unsigned int txq);
 void otx2_get_mac_from_af(struct net_device *netdev);
 void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx);
 
index 60a67b90f2b043d65b4eef6e58883144cbbaf4db..171bab0ed662ee3732e9148e99a34434d71299ea 100644 (file)
@@ -478,6 +478,85 @@ static int otx2_set_real_num_queues(struct net_device *netdev,
        return err;
 }
 
+static irqreturn_t otx2_q_intr_handler(int irq, void *data)
+{
+       struct otx2_nic *pf = data;
+       u64 val, *ptr;
+       u64 qidx = 0;
+
+       /* CQ */
+       for (qidx = 0; qidx < pf->qset.cq_cnt; qidx++) {
+               ptr = otx2_get_regaddr(pf, NIX_LF_CQ_OP_INT);
+               val = otx2_atomic64_add((qidx << 44), ptr);
+
+               otx2_write64(pf, NIX_LF_CQ_OP_INT, (qidx << 44) |
+                            (val & NIX_CQERRINT_BITS));
+               if (!(val & (NIX_CQERRINT_BITS | BIT_ULL(42))))
+                       continue;
+
+               if (val & BIT_ULL(42)) {
+                       netdev_err(pf->netdev, "CQ%lld: error reading NIX_LF_CQ_OP_INT, NIX_LF_ERR_INT 0x%llx\n",
+                                  qidx, otx2_read64(pf, NIX_LF_ERR_INT));
+               } else {
+                       if (val & BIT_ULL(NIX_CQERRINT_DOOR_ERR))
+                               netdev_err(pf->netdev, "CQ%lld: Doorbell error",
+                                          qidx);
+                       if (val & BIT_ULL(NIX_CQERRINT_CQE_FAULT))
+                               netdev_err(pf->netdev, "CQ%lld: Memory fault on CQE write to LLC/DRAM",
+                                          qidx);
+               }
+
+               schedule_work(&pf->reset_task);
+       }
+
+       /* SQ */
+       for (qidx = 0; qidx < pf->hw.tx_queues; qidx++) {
+               ptr = otx2_get_regaddr(pf, NIX_LF_SQ_OP_INT);
+               val = otx2_atomic64_add((qidx << 44), ptr);
+               otx2_write64(pf, NIX_LF_SQ_OP_INT, (qidx << 44) |
+                            (val & NIX_SQINT_BITS));
+
+               if (!(val & (NIX_SQINT_BITS | BIT_ULL(42))))
+                       continue;
+
+               if (val & BIT_ULL(42)) {
+                       netdev_err(pf->netdev, "SQ%lld: error reading NIX_LF_SQ_OP_INT, NIX_LF_ERR_INT 0x%llx\n",
+                                  qidx, otx2_read64(pf, NIX_LF_ERR_INT));
+               } else {
+                       if (val & BIT_ULL(NIX_SQINT_LMT_ERR)) {
+                               netdev_err(pf->netdev, "SQ%lld: LMT store error NIX_LF_SQ_OP_ERR_DBG:0x%llx",
+                                          qidx,
+                                          otx2_read64(pf,
+                                                      NIX_LF_SQ_OP_ERR_DBG));
+                               otx2_write64(pf, NIX_LF_SQ_OP_ERR_DBG,
+                                            BIT_ULL(44));
+                       }
+                       if (val & BIT_ULL(NIX_SQINT_MNQ_ERR)) {
+                               netdev_err(pf->netdev, "SQ%lld: Meta-descriptor enqueue error NIX_LF_MNQ_ERR_DGB:0x%llx\n",
+                                          qidx,
+                                          otx2_read64(pf, NIX_LF_MNQ_ERR_DBG));
+                               otx2_write64(pf, NIX_LF_MNQ_ERR_DBG,
+                                            BIT_ULL(44));
+                       }
+                       if (val & BIT_ULL(NIX_SQINT_SEND_ERR)) {
+                               netdev_err(pf->netdev, "SQ%lld: Send error, NIX_LF_SEND_ERR_DBG 0x%llx",
+                                          qidx,
+                                          otx2_read64(pf,
+                                                      NIX_LF_SEND_ERR_DBG));
+                               otx2_write64(pf, NIX_LF_SEND_ERR_DBG,
+                                            BIT_ULL(44));
+                       }
+                       if (val & BIT_ULL(NIX_SQINT_SQB_ALLOC_FAIL))
+                               netdev_err(pf->netdev, "SQ%lld: SQB allocation failed",
+                                          qidx);
+               }
+
+               schedule_work(&pf->reset_task);
+       }
+
+       return IRQ_HANDLED;
+}
+
 static irqreturn_t otx2_cq_intr_handler(int irq, void *cq_irq)
 {
        struct otx2_cq_poll *cq_poll = (struct otx2_cq_poll *)cq_irq;
@@ -759,6 +838,24 @@ int otx2_open(struct net_device *netdev)
        if (err)
                goto err_disable_napi;
 
+       /* Register Queue IRQ handlers */
+       vec = pf->hw.nix_msixoff + NIX_LF_QINT_VEC_START;
+       irq_name = &pf->hw.irq_name[vec * NAME_SIZE];
+
+       snprintf(irq_name, NAME_SIZE, "%s-qerr", pf->netdev->name);
+
+       err = request_irq(pci_irq_vector(pf->pdev, vec),
+                         otx2_q_intr_handler, 0, irq_name, pf);
+       if (err) {
+               dev_err(pf->dev,
+                       "RVUPF%d: IRQ registration failed for QERR\n",
+                       rvu_get_pf(pf->pcifunc));
+               goto err_disable_napi;
+       }
+
+       /* Enable QINT IRQ */
+       otx2_write64(pf, NIX_LF_QINTX_ENA_W1S(0), BIT_ULL(0));
+
        /* Register CQ IRQ handlers */
        vec = pf->hw.nix_msixoff + NIX_LF_CINT_VEC_START;
        for (qidx = 0; qidx < pf->hw.cint_cnt; qidx++) {
@@ -803,6 +900,11 @@ int otx2_open(struct net_device *netdev)
 
 err_free_cints:
        otx2_free_cints(pf, qidx);
+       vec = pci_irq_vector(pf->pdev,
+                            pf->hw.nix_msixoff + NIX_LF_QINT_VEC_START);
+       otx2_write64(pf, NIX_LF_QINTX_ENA_W1C(0), BIT_ULL(0));
+       synchronize_irq(vec);
+       free_irq(vec, pf);
 err_disable_napi:
        otx2_disable_napi(pf);
        otx2_free_hw_resources(pf);
@@ -818,7 +920,7 @@ int otx2_stop(struct net_device *netdev)
        struct otx2_nic *pf = netdev_priv(netdev);
        struct otx2_cq_poll *cq_poll = NULL;
        struct otx2_qset *qset = &pf->qset;
-       int qidx, vec;
+       int qidx, vec, wrk;
 
        netif_carrier_off(netdev);
        netif_tx_stop_all_queues(netdev);
@@ -830,6 +932,13 @@ int otx2_stop(struct net_device *netdev)
        /* First stop packet Rx/Tx */
        otx2_rxtx_enable(pf, false);
 
+       /* Cleanup Queue IRQ */
+       vec = pci_irq_vector(pf->pdev,
+                            pf->hw.nix_msixoff + NIX_LF_QINT_VEC_START);
+       otx2_write64(pf, NIX_LF_QINTX_ENA_W1C(0), BIT_ULL(0));
+       synchronize_irq(vec);
+       free_irq(vec, pf);
+
        /* Cleanup CQ NAPI and IRQ */
        vec = pf->hw.nix_msixoff + NIX_LF_CINT_VEC_START;
        for (qidx = 0; qidx < pf->hw.cint_cnt; qidx++) {
@@ -852,6 +961,10 @@ int otx2_stop(struct net_device *netdev)
        for (qidx = 0; qidx < netdev->num_tx_queues; qidx++)
                netdev_tx_reset_queue(netdev_get_tx_queue(netdev, qidx));
 
+       for (wrk = 0; wrk < pf->qset.cq_cnt; wrk++)
+               cancel_delayed_work_sync(&pf->refill_wrk[wrk].pool_refill_work);
+       devm_kfree(pf->dev, pf->refill_wrk);
+
        kfree(qset->sq);
        kfree(qset->cq);
        kfree(qset->napi);
@@ -931,6 +1044,19 @@ static int otx2_set_features(struct net_device *netdev,
        return 0;
 }
 
+static void otx2_reset_task(struct work_struct *work)
+{
+       struct otx2_nic *pf = container_of(work, struct otx2_nic, reset_task);
+
+       if (!netif_running(pf->netdev))
+               return;
+
+       otx2_stop(pf->netdev);
+       pf->reset_count++;
+       otx2_open(pf->netdev);
+       netif_trans_update(pf->netdev);
+}
+
 static const struct net_device_ops otx2_netdev_ops = {
        .ndo_open               = otx2_open,
        .ndo_stop               = otx2_stop,
@@ -939,6 +1065,7 @@ static const struct net_device_ops otx2_netdev_ops = {
        .ndo_change_mtu         = otx2_change_mtu,
        .ndo_set_rx_mode        = otx2_set_rx_mode,
        .ndo_set_features       = otx2_set_features,
+       .ndo_tx_timeout         = otx2_tx_timeout,
 };
 
 static int otx2_check_pf_usable(struct otx2_nic *nic)
@@ -1115,12 +1242,16 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
        netdev->hw_features |= NETIF_F_LOOPBACK | NETIF_F_RXALL;
 
+       netdev->watchdog_timeo = OTX2_TX_TIMEOUT;
+
        netdev->netdev_ops = &otx2_netdev_ops;
 
        /* MTU range: 64 - 9190 */
        netdev->min_mtu = OTX2_MIN_MTU;
        netdev->max_mtu = OTX2_MAX_MTU;
 
+       INIT_WORK(&pf->reset_task, otx2_reset_task);
+
        err = register_netdev(netdev);
        if (err) {
                dev_err(dev, "Failed to register netdevice\n");
index 04a9f12be2e9e898379fa1f4aca3b8cecf9436c2..cba59ddf71bb660363c26edcb500ab558f3b410c 100644 (file)
@@ -245,4 +245,32 @@ struct nix_sqe_mem_s {
        u64 addr; /* W1 */
 };
 
+enum nix_cqerrint_e {
+       NIX_CQERRINT_DOOR_ERR = 0,
+       NIX_CQERRINT_WR_FULL = 1,
+       NIX_CQERRINT_CQE_FAULT = 2,
+};
+
+#define NIX_CQERRINT_BITS (BIT_ULL(NIX_CQERRINT_DOOR_ERR) | \
+                          BIT_ULL(NIX_CQERRINT_CQE_FAULT))
+
+enum nix_rqint_e {
+       NIX_RQINT_DROP = 0,
+       NIX_RQINT_RED = 1,
+};
+
+#define NIX_RQINT_BITS (BIT_ULL(NIX_RQINT_DROP) | BIT_ULL(NIX_RQINT_RED))
+
+enum nix_sqint_e {
+       NIX_SQINT_LMT_ERR = 0,
+       NIX_SQINT_MNQ_ERR = 1,
+       NIX_SQINT_SEND_ERR = 2,
+       NIX_SQINT_SQB_ALLOC_FAIL = 3,
+};
+
+#define NIX_SQINT_BITS (BIT_ULL(NIX_SQINT_LMT_ERR) | \
+                       BIT_ULL(NIX_SQINT_MNQ_ERR) | \
+                       BIT_ULL(NIX_SQINT_SEND_ERR) | \
+                       BIT_ULL(NIX_SQINT_SQB_ALLOC_FAIL))
+
 #endif /* OTX2_STRUCT_H */
index 87b579a121ab907208638a7d2f231801dad5a4b1..94dac84de8dc3e6de57c8cda0b4fd796b44b32a1 100644 (file)
@@ -237,12 +237,23 @@ static int otx2_rx_napi_handler(struct otx2_nic *pfvf,
        /* Refill pool with new buffers */
        while (cq->pool_ptrs) {
                bufptr = otx2_alloc_rbuf(pfvf, cq->rbpool, GFP_ATOMIC);
-               if (unlikely(bufptr <= 0))
+               if (unlikely(bufptr <= 0)) {
+                       struct refill_work *work;
+                       struct delayed_work *dwork;
+
+                       work = &pfvf->refill_wrk[cq->cq_idx];
+                       dwork = &work->pool_refill_work;
+                       /* Schedule a task if no other task is running */
+                       if (!cq->refill_task_sched) {
+                               cq->refill_task_sched = true;
+                               schedule_delayed_work(dwork,
+                                                     msecs_to_jiffies(100));
+                       }
                        break;
+               }
                otx2_aura_freeptr(pfvf, cq->cq_idx, bufptr + OTX2_HEAD_ROOM);
                cq->pool_ptrs--;
        }
-       otx2_get_page(cq->rbpool);
 
        return processed_cqe;
 }
@@ -304,6 +315,11 @@ int otx2_napi_handler(struct napi_struct *napi, int budget)
                        continue;
                cq = &qset->cq[cq_idx];
                if (cq->cq_type == CQ_RX) {
+                       /* If the RQ refill WQ task is running, skip napi
+                        * scheduler for this queue.
+                        */
+                       if (cq->refill_task_sched)
+                               continue;
                        workdone += otx2_rx_napi_handler(pfvf, napi,
                                                         cq, budget);
                } else {
index bad225979aec2c8ba1913e0417fe86e6774839a1..a889b491ea52db1472ebc6765233ae737a85ea9d 100644 (file)
@@ -112,6 +112,7 @@ struct otx2_cq_queue {
        u8                      cq_idx;
        u8                      cq_type;
        u8                      cint_idx; /* CQ interrupt id */
+       u8                      refill_task_sched;
        u16                     cqe_size;
        u16                     pool_ptrs;
        u32                     cqe_cnt;