ice: Only bump Rx tail and release buffers once per napi_poll

author Brett Creeley <brett.creeley@intel.com>

Wed, 26 Jun 2019 09:20:19 +0000 (02:20 -0700)

committer Jeff Kirsher <jeffrey.t.kirsher@intel.com>

Wed, 31 Jul 2019 20:40:30 +0000 (13:40 -0700)
author Brett Creeley <brett.creeley@intel.com>
Wed, 26 Jun 2019 09:20:19 +0000 (02:20 -0700)
committer Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Wed, 31 Jul 2019 20:40:30 +0000 (13:40 -0700)
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c

index dd7392f293bf53aa37badb5be6954ab2b76c1016..0c459305c12f60776916c69afc1e42a56e14837f 100644 (file)
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -377,18 +377,28 @@ err:
   */
  static void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
  {
+       u16 prev_ntu = rx_ring->next_to_use;
+
         rx_ring->next_to_use = val;
  
         /* update next to alloc since we have filled the ring */
         rx_ring->next_to_alloc = val;
  
-       /* Force memory writes to complete before letting h/w
-        * know there are new descriptors to fetch. (Only
-        * applicable for weak-ordered memory model archs,
-        * such as IA-64).
+       /* QRX_TAIL will be updated with any tail value, but hardware ignores
+        * the lower 3 bits. This makes it so we only bump tail on meaningful
+        * boundaries. Also, this allows us to bump tail on intervals of 8 up to
+        * the budget depending on the current traffic load.
          */
-       wmb();
-       writel(val, rx_ring->tail);
+       val &= ~0x7;
+       if (prev_ntu != val) {
+               /* Force memory writes to complete before letting h/w
+                * know there are new descriptors to fetch. (Only
+                * applicable for weak-ordered memory model archs,
+                * such as IA-64).
+                */
+               wmb();
+               writel(val, rx_ring->tail);
+       }
  }
  
  /**
@@ -445,7 +455,13 @@ ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi)
   * @rx_ring: ring to place buffers on
   * @cleaned_count: number of buffers to replace
   *
- * Returns false if all allocations were successful, true if any fail
+ * Returns false if all allocations were successful, true if any fail. Returning
+ * true signals to the caller that we didn't replace cleaned_count buffers and
+ * there is more work to do.
+ *
+ * First, try to clean "cleaned_count" Rx buffers. Then refill the cleaned Rx
+ * buffers. Then bump tail at most one time. Grouping like this lets us avoid
+ * multiple tail writes per call.
   */
  bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count)
  {
@@ -990,7 +1006,7 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
  {
         unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
         u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
-       bool failure = false;
+       bool failure;
  
         /* start the loop to process Rx packets bounded by 'budget' */
         while (likely(total_rx_pkts < (unsigned int)budget)) {
@@ -1002,13 +1018,6 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
                 u16 vlan_tag = 0;
                 u8 rx_ptype;
  
-               /* return some buffers to hardware, one at a time is too slow */
-               if (cleaned_count >= ICE_RX_BUF_WRITE) {
-                       failure = failure ||
-                                 ice_alloc_rx_bufs(rx_ring, cleaned_count);
-                       cleaned_count = 0;
-               }
-
                 /* get the Rx desc from Rx ring based on 'next_to_clean' */
                 rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
  
@@ -1085,6 +1094,9 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
                 total_rx_pkts++;
         }
  
+       /* return up to cleaned_count buffers to hardware */
+       failure = ice_alloc_rx_bufs(rx_ring, cleaned_count);
+
         /* update queue and vector specific stats */
         u64_stats_update_begin(&rx_ring->syncp);
         rx_ring->stats.pkts += total_rx_pkts;
author	Brett Creeley <brett.creeley@intel.com>
	Wed, 26 Jun 2019 09:20:19 +0000 (02:20 -0700)
committer	Jeff Kirsher <jeffrey.t.kirsher@intel.com>
	Wed, 31 Jul 2019 20:40:30 +0000 (13:40 -0700)