liquidio: prevent rx queues from getting stalled
authorRaghu Vatsavayi <raghu.vatsavayi@cavium.com>
Thu, 29 Mar 2018 18:13:22 +0000 (11:13 -0700)
committerDavid S. Miller <davem@davemloft.net>
Fri, 30 Mar 2018 18:16:19 +0000 (14:16 -0400)
This commit has fix for RX traffic issues when we stress test the driver
with continuous ifconfig up/down under very high traffic conditions.

Reason for the issue is that, in existing liquidio_stop function NAPI is
disabled even before actual FW/HW interface is brought down via
send_rx_ctrl_cmd(lio, 0). Between time frame of NAPI disable and actual
interface down in firmware, firmware continuously enqueues rx traffic to
host. When interrupt happens for new packets, host irq handler fails in
scheduling NAPI as the NAPI is already disabled.

After "ifconfig <iface> up", Host re-enables NAPI but cannot schedule it
until it receives another Rx interrupt. Host never receives Rx interrupt as
it never cleared the Rx interrupt it received during interface down
operation. NIC Rx interrupt gets cleared only when Host processes queue and
clears the queue counts. Above anomaly leads to other issues like packet
overflow in FW/HW queues, backpressure.

Fix:
This commit fixes this issue by disabling NAPI only after informing
firmware to stop queueing packets to host via send_rx_ctrl_cmd(lio, 0).
send_rx_ctrl_cmd is not visible in the patch as it is already there in the
code. The DOWN command also waits for any pending packets to be processed
by NAPI so that the deadlock will not occur.

Signed-off-by: Raghu Vatsavayi <raghu.vatsavayi@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/cavium/liquidio/lio_core.c
drivers/net/ethernet/cavium/liquidio/lio_main.c
drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
drivers/net/ethernet/cavium/liquidio/octeon_network.h

index 73e70e076e61da048bcded35cddc1c93cc8c833c..2a94eee943b255637698efd12190dc74a1a1a47b 100644 (file)
@@ -1146,3 +1146,26 @@ int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
        octeon_free_soft_command(oct, sc);
        return 0;
 }
+
+int lio_wait_for_clean_oq(struct octeon_device *oct)
+{
+       int retry = 100, pending_pkts = 0;
+       int idx;
+
+       do {
+               pending_pkts = 0;
+
+               for (idx = 0; idx < MAX_OCTEON_OUTPUT_QUEUES(oct); idx++) {
+                       if (!(oct->io_qmask.oq & BIT_ULL(idx)))
+                               continue;
+                       pending_pkts +=
+                               atomic_read(&oct->droq[idx]->pkts_pending);
+               }
+
+               if (pending_pkts > 0)
+                       schedule_timeout_uninterruptible(1);
+
+       } while (retry-- && pending_pkts);
+
+       return pending_pkts;
+}
index 43c5ba0af12b0170bd8a87480022e6eb24a3e175..603a144d3d9c2afcbae25f9dcce89d7061da28aa 100644 (file)
@@ -2084,16 +2084,6 @@ static int liquidio_stop(struct net_device *netdev)
        struct octeon_device *oct = lio->oct_dev;
        struct napi_struct *napi, *n;
 
-       if (oct->props[lio->ifidx].napi_enabled) {
-               list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
-                       napi_disable(napi);
-
-               oct->props[lio->ifidx].napi_enabled = 0;
-
-               if (OCTEON_CN23XX_PF(oct))
-                       oct->droq[0]->ops.poll_mode = 0;
-       }
-
        ifstate_reset(lio, LIO_IFSTATE_RUNNING);
 
        netif_tx_disable(netdev);
@@ -2119,6 +2109,21 @@ static int liquidio_stop(struct net_device *netdev)
                lio->ptp_clock = NULL;
        }
 
+       /* Wait for any pending Rx descriptors */
+       if (lio_wait_for_clean_oq(oct))
+               netif_info(lio, rx_err, lio->netdev,
+                          "Proceeding with stop interface after partial RX desc processing\n");
+
+       if (oct->props[lio->ifidx].napi_enabled == 1) {
+               list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
+                       napi_disable(napi);
+
+               oct->props[lio->ifidx].napi_enabled = 0;
+
+               if (OCTEON_CN23XX_PF(oct))
+                       oct->droq[0]->ops.poll_mode = 0;
+       }
+
        dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name);
 
        return 0;
index dc62698bdaf79eb0ce29d6aa4f57194de8a71796..f92dfa411de68627ccb5a59d189d5f21e976ed8c 100644 (file)
@@ -1138,15 +1138,6 @@ static int liquidio_stop(struct net_device *netdev)
        /* tell Octeon to stop forwarding packets to host */
        send_rx_ctrl_cmd(lio, 0);
 
-       if (oct->props[lio->ifidx].napi_enabled) {
-               list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
-                       napi_disable(napi);
-
-               oct->props[lio->ifidx].napi_enabled = 0;
-
-               oct->droq[0]->ops.poll_mode = 0;
-       }
-
        netif_info(lio, ifdown, lio->netdev, "Stopping interface!\n");
        /* Inform that netif carrier is down */
        lio->intf_open = 0;
@@ -1159,6 +1150,20 @@ static int liquidio_stop(struct net_device *netdev)
 
        stop_txqs(netdev);
 
+       /* Wait for any pending Rx descriptors */
+       if (lio_wait_for_clean_oq(oct))
+               netif_info(lio, rx_err, lio->netdev,
+                          "Proceeding with stop interface after partial RX desc processing\n");
+
+       if (oct->props[lio->ifidx].napi_enabled == 1) {
+               list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
+                       napi_disable(napi);
+
+               oct->props[lio->ifidx].napi_enabled = 0;
+
+               oct->droq[0]->ops.poll_mode = 0;
+       }
+
        dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name);
 
        return 0;
index 8782206271b631c04398b492ce289e77a8856822..4069710796a84e2aad17d5ccd6bfeda0867b4d3b 100644 (file)
@@ -190,6 +190,7 @@ irqreturn_t liquidio_msix_intr_handler(int irq __attribute__((unused)),
 
 int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs);
 
+int lio_wait_for_clean_oq(struct octeon_device *oct);
 /**
  * \brief Register ethtool operations
  * @param netdev    pointer to network device