From: Zoltan Kiss Date: Thu, 6 Mar 2014 21:48:30 +0000 (+0000) Subject: xen-netback: Timeout packets in RX path X-Git-Url: http://git.lede-project.org./?a=commitdiff_plain;h=093507885ae5dc0288af07fbb922d2f85b3a88a6;p=openwrt%2Fstaging%2Fblogic.git xen-netback: Timeout packets in RX path A malicious or buggy guest can leave its queue filled indefinitely, in which case qdisc start to queue packets for that VIF. If those packets came from an another guest, it can block its slots and prevent shutdown. To avoid that, we make sure the queue is drained in every 10 seconds. The QDisc queue in worst case takes 3 round to flush usually. Signed-off-by: Zoltan Kiss Signed-off-by: David S. Miller --- diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index f2f8a02afc36..0355f8767e3b 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -148,6 +148,9 @@ struct xenvif { struct xen_netif_rx_back_ring rx; struct sk_buff_head rx_queue; RING_IDX rx_last_skb_slots; + bool rx_queue_purge; + + struct timer_list wake_queue; /* This array is allocated seperately as it is large */ struct gnttab_copy *grant_copy_op; @@ -259,4 +262,7 @@ void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success); extern bool separate_tx_rx_irq; +extern unsigned int rx_drain_timeout_msecs; +extern unsigned int rx_drain_timeout_jiffies; + #endif /* __XEN_NETBACK__COMMON_H__ */ diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index b646039e539b..9cc9f638f442 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -115,6 +115,18 @@ static irqreturn_t xenvif_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +static void xenvif_wake_queue(unsigned long data) +{ + struct xenvif *vif = (struct xenvif *)data; + + if (netif_queue_stopped(vif->dev)) { + netdev_err(vif->dev, "draining TX queue\n"); + vif->rx_queue_purge = true; + xenvif_kick_thread(vif); + netif_wake_queue(vif->dev); + } +} + static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct xenvif *vif = netdev_priv(dev); @@ -144,8 +156,13 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) * then turn off the queue to give the ring a chance to * drain. */ - if (!xenvif_rx_ring_slots_available(vif, min_slots_needed)) + if (!xenvif_rx_ring_slots_available(vif, min_slots_needed)) { + vif->wake_queue.function = xenvif_wake_queue; + vif->wake_queue.data = (unsigned long)vif; xenvif_stop_queue(vif); + mod_timer(&vif->wake_queue, + jiffies + rx_drain_timeout_jiffies); + } skb_queue_tail(&vif->rx_queue, skb); xenvif_kick_thread(vif); @@ -353,6 +370,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, init_timer(&vif->credit_timeout); vif->credit_window_start = get_jiffies_64(); + init_timer(&vif->wake_queue); + dev->netdev_ops = &xenvif_netdev_ops; dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | @@ -531,6 +550,7 @@ void xenvif_disconnect(struct xenvif *vif) xenvif_carrier_off(vif); if (vif->task) { + del_timer_sync(&vif->wake_queue); kthread_stop(vif->task); vif->task = NULL; } @@ -556,12 +576,25 @@ void xenvif_disconnect(struct xenvif *vif) void xenvif_free(struct xenvif *vif) { int i, unmap_timeout = 0; + /* Here we want to avoid timeout messages if an skb can be legitimatly + * stucked somewhere else. Realisticly this could be an another vif's + * internal or QDisc queue. That another vif also has this + * rx_drain_timeout_msecs timeout, but the timer only ditches the + * internal queue. After that, the QDisc queue can put in worst case + * XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS skbs into that another vif's + * internal queue, so we need several rounds of such timeouts until we + * can be sure that no another vif should have skb's from us. We are + * not sending more skb's, so newly stucked packets are not interesting + * for us here. + */ + unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000) * + DIV_ROUND_UP(XENVIF_QUEUE_LENGTH, (XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS)); for (i = 0; i < MAX_PENDING_REQS; ++i) { if (vif->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) { unmap_timeout++; schedule_timeout(msecs_to_jiffies(1000)); - if (unmap_timeout > 9 && + if (unmap_timeout > worst_case_skb_lifetime && net_ratelimit()) netdev_err(vif->dev, "Page still granted! Index: %x\n", diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 58effc49f526..8518a0d1f6f9 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -55,6 +55,13 @@ bool separate_tx_rx_irq = 1; module_param(separate_tx_rx_irq, bool, 0644); +/* When guest ring is filled up, qdisc queues the packets for us, but we have + * to timeout them, otherwise other guests' packets can get stucked there + */ +unsigned int rx_drain_timeout_msecs = 10000; +module_param(rx_drain_timeout_msecs, uint, 0444); +unsigned int rx_drain_timeout_jiffies; + /* * This is the maximum slots a skb can have. If a guest sends a skb * which exceeds this limit it is considered malicious. @@ -1694,8 +1701,9 @@ void xenvif_idx_unmap(struct xenvif *vif, u16 pending_idx) static inline int rx_work_todo(struct xenvif *vif) { - return !skb_queue_empty(&vif->rx_queue) && - xenvif_rx_ring_slots_available(vif, vif->rx_last_skb_slots); + return (!skb_queue_empty(&vif->rx_queue) && + xenvif_rx_ring_slots_available(vif, vif->rx_last_skb_slots)) || + vif->rx_queue_purge; } static inline int tx_work_todo(struct xenvif *vif) @@ -1782,12 +1790,19 @@ int xenvif_kthread_guest_rx(void *data) if (kthread_should_stop()) break; + if (vif->rx_queue_purge) { + skb_queue_purge(&vif->rx_queue); + vif->rx_queue_purge = false; + } + if (!skb_queue_empty(&vif->rx_queue)) xenvif_rx_action(vif); if (skb_queue_empty(&vif->rx_queue) && - netif_queue_stopped(vif->dev)) + netif_queue_stopped(vif->dev)) { + del_timer_sync(&vif->wake_queue); xenvif_start_queue(vif); + } cond_resched(); } @@ -1838,6 +1853,8 @@ static int __init netback_init(void) if (rc) goto failed_init; + rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs); + return 0; failed_init: