From: Jo-Philipp Wich Date: Mon, 16 Apr 2012 15:04:44 +0000 (+0000) Subject: Update ppp TX restart patch to avoid race condition X-Git-Tag: reboot~14276 X-Git-Url: http://git.lede-project.org./?a=commitdiff_plain;h=2036c23faa0f6ed2784769d3f4a787dd76a0c520;p=openwrt%2Fstaging%2Fxback.git Update ppp TX restart patch to avoid race condition Signed-off-by: David Woodhouse SVN-Revision: 31312 --- diff --git a/target/linux/generic/patches-3.2/120-ppp_txqueue_restart.patch b/target/linux/generic/patches-3.2/120-ppp_txqueue_restart.patch index 84eb4e41ba..165801cac6 100644 --- a/target/linux/generic/patches-3.2/120-ppp_txqueue_restart.patch +++ b/target/linux/generic/patches-3.2/120-ppp_txqueue_restart.patch @@ -1,75 +1,77 @@ -For every transmitted packet, ppp_start_xmit() will stop the netdev -queue and then, if appropriate, restart it. This causes the TX softirq -to run, entirely gratuitously. +commit 9a5d2bd99e0dfe9a31b3c160073ac445ba3d773f +Author: David Woodhouse +Date: Sun Apr 8 10:01:44 2012 +0000 -This is "only" a waste of CPU time in the normal case, but it's actively -harmful when the PPP device is a TEQL slave — the wakeup will cause the -offending device to receive the next TX packet from the TEQL queue, when -it *should* have gone to the next slave in the list. We end up seeing -large bursts of packets on just *one* slave device, rather than using -the full available bandwidth over all slaves. + ppp: Fix race condition with queue start/stop + + Commit e675f0cc9a872fd152edc0c77acfed19bf28b81e ("ppp: Don't stop and + restart queue on every TX packet") introduced a race condition which + could leave the net queue stopped even when the channel is no longer + busy. By calling netif_stop_queue() from ppp_start_xmit(), based on the + return value from ppp_xmit_process() but *after* all the locks have been + dropped, we could potentially do so *after* the channel has actually + finished transmitting and attempted to re-wake the queue. + + Fix this by moving the netif_stop_queue() into ppp_xmit_process() under + the xmit lock. I hadn't done this previously, because it gets called + from other places than ppp_start_xmit(). But I now think it's the better + option. The net queue *should* be stopped if the channel becomes + congested due to writes from pppd, anyway. + + Signed-off-by: David Woodhouse + Signed-off-by: David S. Miller -This patch fixes the problem by *not* unconditionally stopping the queue -in ppp_start_xmit(). It adds a return value from ppp_xmit_process() -which indicates whether the queue should be stopped or not. +commit e675f0cc9a872fd152edc0c77acfed19bf28b81e +Author: David Woodhouse +Date: Mon Mar 26 00:03:42 2012 +0000 -It *doesn't* remove the call to netif_wake_queue() from -ppp_xmit_process(), because other code paths (especially from -ppp_output_wakeup()) need it there and it's messy to push it out to the -other callers to do it based on the return value. So we leave it in -place — it's a no-op in the case where the queue wasn't stopped, so it's -harmless in the TX path. + ppp: Don't stop and restart queue on every TX packet + + For every transmitted packet, ppp_start_xmit() will stop the netdev + queue and then, if appropriate, restart it. This causes the TX softirq + to run, entirely gratuitously. + + This is "only" a waste of CPU time in the normal case, but it's actively + harmful when the PPP device is a TEQL slave — the wakeup will cause the + offending device to receive the next TX packet from the TEQL queue, when + it *should* have gone to the next slave in the list. We end up seeing + large bursts of packets on just *one* slave device, rather than using + the full available bandwidth over all slaves. + + This patch fixes the problem by *not* unconditionally stopping the queue + in ppp_start_xmit(). It adds a return value from ppp_xmit_process() + which indicates whether the queue should be stopped or not. + + It *doesn't* remove the call to netif_wake_queue() from + ppp_xmit_process(), because other code paths (especially from + ppp_output_wakeup()) need it there and it's messy to push it out to the + other callers to do it based on the return value. So we leave it in + place — it's a no-op in the case where the queue wasn't stopped, so it's + harmless in the TX path. + + Signed-off-by: David Woodhouse + Signed-off-by: David S. Miller -Signed-off-by: David Woodhouse + +diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c +index 159da29..21d7151 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c -@@ -235,7 +235,7 @@ struct ppp_net { - /* Prototypes. */ - static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf, - struct file *file, unsigned int cmd, unsigned long arg); --static void ppp_xmit_process(struct ppp *ppp); -+static int ppp_xmit_process(struct ppp *ppp); - static void ppp_send_frame(struct ppp *ppp, struct sk_buff *skb); - static void ppp_push(struct ppp *ppp); - static void ppp_channel_push(struct channel *pch); -@@ -968,9 +968,9 @@ ppp_start_xmit(struct sk_buff *skb, stru +@@ -968,7 +968,6 @@ ppp_start_xmit(struct sk_buff *skb, struct net_device *dev) proto = npindex_to_proto[npi]; put_unaligned_be16(proto, pp); - netif_stop_queue(dev); skb_queue_tail(&ppp->file.xq, skb); -- ppp_xmit_process(ppp); -+ if (!ppp_xmit_process(ppp)) -+ netif_stop_queue(dev); + ppp_xmit_process(ppp); return NETDEV_TX_OK; - - outf: -@@ -1048,10 +1048,11 @@ static void ppp_setup(struct net_device - * Called to do any work queued up on the transmit side - * that can now be done. - */ --static void -+static int - ppp_xmit_process(struct ppp *ppp) - { - struct sk_buff *skb; -+ int ret = 0; - - ppp_xmit_lock(ppp); - if (!ppp->closing) { -@@ -1061,10 +1062,13 @@ ppp_xmit_process(struct ppp *ppp) - ppp_send_frame(ppp, skb); - /* If there's no work left to do, tell the core net +@@ -1063,6 +1062,8 @@ ppp_xmit_process(struct ppp *ppp) code that we can accept some more. */ -- if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq)) -+ if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq)) { + if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq)) netif_wake_queue(ppp->dev); -+ ret = 1; -+ } ++ else ++ netif_stop_queue(ppp->dev); } ppp_xmit_unlock(ppp); -+ return ret; } - - static inline struct sk_buff * diff --git a/target/linux/generic/patches-3.3/120-ppp_txqueue_restart.patch b/target/linux/generic/patches-3.3/120-ppp_txqueue_restart.patch index 84eb4e41ba..165801cac6 100644 --- a/target/linux/generic/patches-3.3/120-ppp_txqueue_restart.patch +++ b/target/linux/generic/patches-3.3/120-ppp_txqueue_restart.patch @@ -1,75 +1,77 @@ -For every transmitted packet, ppp_start_xmit() will stop the netdev -queue and then, if appropriate, restart it. This causes the TX softirq -to run, entirely gratuitously. +commit 9a5d2bd99e0dfe9a31b3c160073ac445ba3d773f +Author: David Woodhouse +Date: Sun Apr 8 10:01:44 2012 +0000 -This is "only" a waste of CPU time in the normal case, but it's actively -harmful when the PPP device is a TEQL slave — the wakeup will cause the -offending device to receive the next TX packet from the TEQL queue, when -it *should* have gone to the next slave in the list. We end up seeing -large bursts of packets on just *one* slave device, rather than using -the full available bandwidth over all slaves. + ppp: Fix race condition with queue start/stop + + Commit e675f0cc9a872fd152edc0c77acfed19bf28b81e ("ppp: Don't stop and + restart queue on every TX packet") introduced a race condition which + could leave the net queue stopped even when the channel is no longer + busy. By calling netif_stop_queue() from ppp_start_xmit(), based on the + return value from ppp_xmit_process() but *after* all the locks have been + dropped, we could potentially do so *after* the channel has actually + finished transmitting and attempted to re-wake the queue. + + Fix this by moving the netif_stop_queue() into ppp_xmit_process() under + the xmit lock. I hadn't done this previously, because it gets called + from other places than ppp_start_xmit(). But I now think it's the better + option. The net queue *should* be stopped if the channel becomes + congested due to writes from pppd, anyway. + + Signed-off-by: David Woodhouse + Signed-off-by: David S. Miller -This patch fixes the problem by *not* unconditionally stopping the queue -in ppp_start_xmit(). It adds a return value from ppp_xmit_process() -which indicates whether the queue should be stopped or not. +commit e675f0cc9a872fd152edc0c77acfed19bf28b81e +Author: David Woodhouse +Date: Mon Mar 26 00:03:42 2012 +0000 -It *doesn't* remove the call to netif_wake_queue() from -ppp_xmit_process(), because other code paths (especially from -ppp_output_wakeup()) need it there and it's messy to push it out to the -other callers to do it based on the return value. So we leave it in -place — it's a no-op in the case where the queue wasn't stopped, so it's -harmless in the TX path. + ppp: Don't stop and restart queue on every TX packet + + For every transmitted packet, ppp_start_xmit() will stop the netdev + queue and then, if appropriate, restart it. This causes the TX softirq + to run, entirely gratuitously. + + This is "only" a waste of CPU time in the normal case, but it's actively + harmful when the PPP device is a TEQL slave — the wakeup will cause the + offending device to receive the next TX packet from the TEQL queue, when + it *should* have gone to the next slave in the list. We end up seeing + large bursts of packets on just *one* slave device, rather than using + the full available bandwidth over all slaves. + + This patch fixes the problem by *not* unconditionally stopping the queue + in ppp_start_xmit(). It adds a return value from ppp_xmit_process() + which indicates whether the queue should be stopped or not. + + It *doesn't* remove the call to netif_wake_queue() from + ppp_xmit_process(), because other code paths (especially from + ppp_output_wakeup()) need it there and it's messy to push it out to the + other callers to do it based on the return value. So we leave it in + place — it's a no-op in the case where the queue wasn't stopped, so it's + harmless in the TX path. + + Signed-off-by: David Woodhouse + Signed-off-by: David S. Miller -Signed-off-by: David Woodhouse + +diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c +index 159da29..21d7151 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c -@@ -235,7 +235,7 @@ struct ppp_net { - /* Prototypes. */ - static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf, - struct file *file, unsigned int cmd, unsigned long arg); --static void ppp_xmit_process(struct ppp *ppp); -+static int ppp_xmit_process(struct ppp *ppp); - static void ppp_send_frame(struct ppp *ppp, struct sk_buff *skb); - static void ppp_push(struct ppp *ppp); - static void ppp_channel_push(struct channel *pch); -@@ -968,9 +968,9 @@ ppp_start_xmit(struct sk_buff *skb, stru +@@ -968,7 +968,6 @@ ppp_start_xmit(struct sk_buff *skb, struct net_device *dev) proto = npindex_to_proto[npi]; put_unaligned_be16(proto, pp); - netif_stop_queue(dev); skb_queue_tail(&ppp->file.xq, skb); -- ppp_xmit_process(ppp); -+ if (!ppp_xmit_process(ppp)) -+ netif_stop_queue(dev); + ppp_xmit_process(ppp); return NETDEV_TX_OK; - - outf: -@@ -1048,10 +1048,11 @@ static void ppp_setup(struct net_device - * Called to do any work queued up on the transmit side - * that can now be done. - */ --static void -+static int - ppp_xmit_process(struct ppp *ppp) - { - struct sk_buff *skb; -+ int ret = 0; - - ppp_xmit_lock(ppp); - if (!ppp->closing) { -@@ -1061,10 +1062,13 @@ ppp_xmit_process(struct ppp *ppp) - ppp_send_frame(ppp, skb); - /* If there's no work left to do, tell the core net +@@ -1063,6 +1062,8 @@ ppp_xmit_process(struct ppp *ppp) code that we can accept some more. */ -- if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq)) -+ if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq)) { + if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq)) netif_wake_queue(ppp->dev); -+ ret = 1; -+ } ++ else ++ netif_stop_queue(ppp->dev); } ppp_xmit_unlock(ppp); -+ return ret; } - - static inline struct sk_buff *