[generic] ppp: Fix high softirq utilization with pppoa
authorFlorian Fainelli <florian@openwrt.org>
Tue, 27 Mar 2012 17:47:44 +0000 (17:47 +0000)
committerFlorian Fainelli <florian@openwrt.org>
Tue, 27 Mar 2012 17:47:44 +0000 (17:47 +0000)
Users of the Geos platform are reporting high CPU utilization.
This seems to be rooted in a problem with the TX queue restart in PPP.

Signed-off-by: Philip Prindeville <philipp@redfish-solutions.com>
SVN-Revision: 31096

target/linux/generic/patches-3.2/120-ppp_txqueue_restart.patch [new file with mode: 0644]
target/linux/generic/patches-3.3/120-ppp_txqueue_restart.patch [new file with mode: 0644]

diff --git a/target/linux/generic/patches-3.2/120-ppp_txqueue_restart.patch b/target/linux/generic/patches-3.2/120-ppp_txqueue_restart.patch
new file mode 100644 (file)
index 0000000..83033a1
--- /dev/null
@@ -0,0 +1,82 @@
+For every transmitted packet, ppp_start_xmit() will stop the netdev
+queue and then, if appropriate, restart it. This causes the TX softirq
+to run, entirely gratuitously.
+
+This is "only" a waste of CPU time in the normal case, but it's actively
+harmful when the PPP device is a TEQL slave — the wakeup will cause the
+offending device to receive the next TX packet from the TEQL queue, when
+it *should* have gone to the next slave in the list. We end up seeing
+large bursts of packets on just *one* slave device, rather than using
+the full available bandwidth over all slaves.
+
+This patch fixes the problem by *not* unconditionally stopping the queue
+in ppp_start_xmit(). It adds a return value from ppp_xmit_process()
+which indicates whether the queue should be stopped or not.
+
+It *doesn't* remove the call to netif_wake_queue() from
+ppp_xmit_process(), because other code paths (especially from
+ppp_output_wakeup()) need it there and it's messy to push it out to the
+other callers to do it based on the return value. So we leave it in
+place — it's a no-op in the case where the queue wasn't stopped, so it's
+harmless in the TX path.
+
+Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
+
+--- a/drivers/net/ppp/ppp_generic.c~   2012-01-26 00:39:32.000000000 +0000
++++ b/drivers/net/ppp/ppp_generic.c    2012-03-26 10:32:31.286744147 +0100
+@@ -235,7 +235,7 @@ struct ppp_net {
+ /* Prototypes. */
+ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
+                       struct file *file, unsigned int cmd, unsigned long arg);
+-static void ppp_xmit_process(struct ppp *ppp);
++static int ppp_xmit_process(struct ppp *ppp);
+ static void ppp_send_frame(struct ppp *ppp, struct sk_buff *skb);
+ static void ppp_push(struct ppp *ppp);
+ static void ppp_channel_push(struct channel *pch);
+@@ -968,9 +968,9 @@ ppp_start_xmit(struct sk_buff *skb, stru
+       proto = npindex_to_proto[npi];
+       put_unaligned_be16(proto, pp);
+-      netif_stop_queue(dev);
+       skb_queue_tail(&ppp->file.xq, skb);
+-      ppp_xmit_process(ppp);
++      if (!ppp_xmit_process(ppp))
++              netif_stop_queue(dev);
+       return NETDEV_TX_OK;
+  outf:
+@@ -1048,10 +1048,11 @@ static void ppp_setup(struct net_device
+  * Called to do any work queued up on the transmit side
+  * that can now be done.
+  */
+-static void
++static int
+ ppp_xmit_process(struct ppp *ppp)
+ {
+       struct sk_buff *skb;
++      int ret = 0;
+       ppp_xmit_lock(ppp);
+       if (!ppp->closing) {
+@@ -1061,10 +1062,13 @@ ppp_xmit_process(struct ppp *ppp)
+                       ppp_send_frame(ppp, skb);
+               /* If there's no work left to do, tell the core net
+                  code that we can accept some more. */
+-              if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq))
++              if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq)) {
+                       netif_wake_queue(ppp->dev);
++                      ret = 1;
++              }
+       }
+       ppp_xmit_unlock(ppp);
++      return ret;
+ }
+ static inline struct sk_buff *
+
+-- 
+David Woodhouse                            Open Source Technology Centre
+David.Woodhouse@intel.com                              Intel Corporation
+
+
+
diff --git a/target/linux/generic/patches-3.3/120-ppp_txqueue_restart.patch b/target/linux/generic/patches-3.3/120-ppp_txqueue_restart.patch
new file mode 100644 (file)
index 0000000..83033a1
--- /dev/null
@@ -0,0 +1,82 @@
+For every transmitted packet, ppp_start_xmit() will stop the netdev
+queue and then, if appropriate, restart it. This causes the TX softirq
+to run, entirely gratuitously.
+
+This is "only" a waste of CPU time in the normal case, but it's actively
+harmful when the PPP device is a TEQL slave — the wakeup will cause the
+offending device to receive the next TX packet from the TEQL queue, when
+it *should* have gone to the next slave in the list. We end up seeing
+large bursts of packets on just *one* slave device, rather than using
+the full available bandwidth over all slaves.
+
+This patch fixes the problem by *not* unconditionally stopping the queue
+in ppp_start_xmit(). It adds a return value from ppp_xmit_process()
+which indicates whether the queue should be stopped or not.
+
+It *doesn't* remove the call to netif_wake_queue() from
+ppp_xmit_process(), because other code paths (especially from
+ppp_output_wakeup()) need it there and it's messy to push it out to the
+other callers to do it based on the return value. So we leave it in
+place — it's a no-op in the case where the queue wasn't stopped, so it's
+harmless in the TX path.
+
+Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
+
+--- a/drivers/net/ppp/ppp_generic.c~   2012-01-26 00:39:32.000000000 +0000
++++ b/drivers/net/ppp/ppp_generic.c    2012-03-26 10:32:31.286744147 +0100
+@@ -235,7 +235,7 @@ struct ppp_net {
+ /* Prototypes. */
+ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
+                       struct file *file, unsigned int cmd, unsigned long arg);
+-static void ppp_xmit_process(struct ppp *ppp);
++static int ppp_xmit_process(struct ppp *ppp);
+ static void ppp_send_frame(struct ppp *ppp, struct sk_buff *skb);
+ static void ppp_push(struct ppp *ppp);
+ static void ppp_channel_push(struct channel *pch);
+@@ -968,9 +968,9 @@ ppp_start_xmit(struct sk_buff *skb, stru
+       proto = npindex_to_proto[npi];
+       put_unaligned_be16(proto, pp);
+-      netif_stop_queue(dev);
+       skb_queue_tail(&ppp->file.xq, skb);
+-      ppp_xmit_process(ppp);
++      if (!ppp_xmit_process(ppp))
++              netif_stop_queue(dev);
+       return NETDEV_TX_OK;
+  outf:
+@@ -1048,10 +1048,11 @@ static void ppp_setup(struct net_device
+  * Called to do any work queued up on the transmit side
+  * that can now be done.
+  */
+-static void
++static int
+ ppp_xmit_process(struct ppp *ppp)
+ {
+       struct sk_buff *skb;
++      int ret = 0;
+       ppp_xmit_lock(ppp);
+       if (!ppp->closing) {
+@@ -1061,10 +1062,13 @@ ppp_xmit_process(struct ppp *ppp)
+                       ppp_send_frame(ppp, skb);
+               /* If there's no work left to do, tell the core net
+                  code that we can accept some more. */
+-              if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq))
++              if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq)) {
+                       netif_wake_queue(ppp->dev);
++                      ret = 1;
++              }
+       }
+       ppp_xmit_unlock(ppp);
++      return ret;
+ }
+ static inline struct sk_buff *
+
+-- 
+David Woodhouse                            Open Source Technology Centre
+David.Woodhouse@intel.com                              Intel Corporation
+
+
+