net: use listified RX for handling GRO_NORMAL skbs

author Edward Cree <ecree@solarflare.com>

Tue, 6 Aug 2019 13:53:55 +0000 (14:53 +0100)

committer David S. Miller <davem@davemloft.net>

Fri, 9 Aug 2019 01:22:29 +0000 (18:22 -0700)
author Edward Cree <ecree@solarflare.com>
Tue, 6 Aug 2019 13:53:55 +0000 (14:53 +0100)
committer David S. Miller <davem@davemloft.net>
Fri, 9 Aug 2019 01:22:29 +0000 (18:22 -0700)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h

index 88292953aa6fdf1d6713da2c825bf08978051bd8..55ac223553f8cc42803bc6c20341ef1793e58b80 100644 (file)
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -332,6 +332,8 @@ struct napi_struct {
         struct net_device       *dev;
         struct gro_list         gro_hash[GRO_HASH_BUCKETS];
         struct sk_buff          *skb;
+       struct list_head        rx_list; /* Pending GRO_NORMAL skbs */
+       int                     rx_count; /* length of rx_list */
         struct hrtimer          timer;
         struct list_head        dev_list;
         struct hlist_node       napi_hash_node;
@@ -4239,6 +4241,7 @@ extern int                dev_weight_rx_bias;
  extern int             dev_weight_tx_bias;
  extern int             dev_rx_weight;
  extern int             dev_tx_weight;
+extern int             gro_normal_batch;
  
  bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
  struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
diff --git a/net/core/dev.c b/net/core/dev.c

index af071b0ce88e6c53a0ec8c3d6a3941c2e2f091f0..49589ed2018df1d8552afeaa0c18b53f81a0be29 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3963,6 +3963,8 @@ int dev_weight_rx_bias __read_mostly = 1;  /* bias for backlog weight */
  int dev_weight_tx_bias __read_mostly = 1;  /* bias for output_queue quota */
  int dev_rx_weight __read_mostly = 64;
  int dev_tx_weight __read_mostly = 64;
+/* Maximum number of GRO_NORMAL skbs to batch up for list-RX */
+int gro_normal_batch __read_mostly = 8;
  
  /* Called with irq disabled */
  static inline void ____napi_schedule(struct softnet_data *sd,
@@ -5747,6 +5749,26 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
  }
  EXPORT_SYMBOL(napi_get_frags);
  
+/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
+static void gro_normal_list(struct napi_struct *napi)
+{
+       if (!napi->rx_count)
+               return;
+       netif_receive_skb_list_internal(&napi->rx_list);
+       INIT_LIST_HEAD(&napi->rx_list);
+       napi->rx_count = 0;
+}
+
+/* Queue one GRO_NORMAL SKB up for list processing.  If batch size exceeded,
+ * pass the whole batch up to the stack.
+ */
+static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
+{
+       list_add_tail(&skb->list, &napi->rx_list);
+       if (++napi->rx_count >= gro_normal_batch)
+               gro_normal_list(napi);
+}
+
  static gro_result_t napi_frags_finish(struct napi_struct *napi,
                                       struct sk_buff *skb,
                                       gro_result_t ret)
@@ -5756,8 +5778,8 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi,
         case GRO_HELD:
                 __skb_push(skb, ETH_HLEN);
                 skb->protocol = eth_type_trans(skb, skb->dev);
-               if (ret == GRO_NORMAL && netif_receive_skb_internal(skb))
-                       ret = GRO_DROP;
+               if (ret == GRO_NORMAL)
+                       gro_normal_one(napi, skb);
                 break;
  
         case GRO_DROP:
@@ -6034,6 +6056,8 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
                                  NAPIF_STATE_IN_BUSY_POLL)))
                 return false;
  
+       gro_normal_list(n);
+
         if (n->gro_bitmask) {
                 unsigned long timeout = 0;
  
@@ -6119,10 +6143,19 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
          * Ideally, a new ndo_busy_poll_stop() could avoid another round.
          */
         rc = napi->poll(napi, BUSY_POLL_BUDGET);
+       /* We can't gro_normal_list() here, because napi->poll() might have
+        * rearmed the napi (napi_complete_done()) in which case it could
+        * already be running on another CPU.
+        */
         trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
         netpoll_poll_unlock(have_poll_lock);
-       if (rc == BUSY_POLL_BUDGET)
+       if (rc == BUSY_POLL_BUDGET) {
+               /* As the whole budget was spent, we still own the napi so can
+                * safely handle the rx_list.
+                */
+               gro_normal_list(napi);
                 __napi_schedule(napi);
+       }
         local_bh_enable();
  }
  
@@ -6167,6 +6200,7 @@ restart:
                 }
                 work = napi_poll(napi, BUSY_POLL_BUDGET);
                 trace_napi_poll(napi, work, BUSY_POLL_BUDGET);
+               gro_normal_list(napi);
  count:
                 if (work > 0)
                         __NET_ADD_STATS(dev_net(napi->dev),
@@ -6272,6 +6306,8 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
         napi->timer.function = napi_watchdog;
         init_gro_hash(napi);
         napi->skb = NULL;
+       INIT_LIST_HEAD(&napi->rx_list);
+       napi->rx_count = 0;
         napi->poll = poll;
         if (weight > NAPI_POLL_WEIGHT)
                 netdev_err_once(dev, "%s() called with weight %d\n", __func__,
@@ -6368,6 +6404,8 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
                 goto out_unlock;
         }
  
+       gro_normal_list(n);
+
         if (n->gro_bitmask) {
                 /* flush too old packets
                  * If HZ < 1000, flush all packets.
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c

index 8da5b3a54dac40c3eb939408486b6e1036306747..eb29e5adc84daec3ee87157e44b3358a1bae82a3 100644 (file)
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -567,6 +567,14 @@ static struct ctl_table net_core_table[] = {
                 .mode           = 0644,
                 .proc_handler   = proc_do_static_key,
         },
+       {
+               .procname       = "gro_normal_batch",
+               .data           = &gro_normal_batch,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = SYSCTL_ONE,
+       },
         { }
  };
author	Edward Cree <ecree@solarflare.com>
	Tue, 6 Aug 2019 13:53:55 +0000 (14:53 +0100)
committer	David S. Miller <davem@davemloft.net>
	Fri, 9 Aug 2019 01:22:29 +0000 (18:22 -0700)
include/linux/netdevice.h		patch \| blob \| history
net/core/dev.c		patch \| blob \| history
net/core/sysctl_net_core.c		patch \| blob \| history