cbq: incorrectly low bandwidth setting blocks limited traffic
authorVasily Averin <vvs@parallels.com>
Thu, 14 Aug 2014 08:27:47 +0000 (12:27 +0400)
committerDavid S. Miller <davem@davemloft.net>
Tue, 19 Aug 2014 17:58:44 +0000 (10:58 -0700)
Mainstream commit f0f6ee1f70c4 ("cbq: incorrect processing of high limits")
have side effect: if cbq bandwidth setting is less than real interface
throughput non-limited traffic can delay limited traffic for a very long time.

This happen because of q->now changes incorrectly in cbq_dequeue():
in described scenario L2T is much greater than real time delay,
and q->now gets an extra boost for each transmitted packet.

Accumulated boost prevents update q->now, and blocked class can wait
very long time until (q->now >= cl->undertime) will be true again.

To fix the problem the patch updates q->now on each cbq_update() call.
L2T-related pre-modification q->now was moved to cbq_update().

My testing confirmed that it fixes the problem and did not discover
any side-effects

Fixes: f0f6ee1f70c4 ("cbq: incorrect processing of high limits")
Signed-off-by: Vasily Averin <vvs@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/sched/sch_cbq.c

index ead526467ccae574d6afddb7a5146fc7507e68a9..550be9504fff9c1066804b5faf9e8f85f748f7f7 100644 (file)
@@ -700,8 +700,13 @@ cbq_update(struct cbq_sched_data *q)
        struct cbq_class *this = q->tx_class;
        struct cbq_class *cl = this;
        int len = q->tx_len;
+       psched_time_t now;
 
        q->tx_class = NULL;
+       /* Time integrator. We calculate EOS time
+        * by adding expected packet transmission time.
+        */
+       now = q->now + L2T(&q->link, len);
 
        for ( ; cl; cl = cl->share) {
                long avgidle = cl->avgidle;
@@ -717,7 +722,7 @@ cbq_update(struct cbq_sched_data *q)
                 *      idle = (now - last) - last_pktlen/rate
                 */
 
-               idle = q->now - cl->last;
+               idle = now - cl->last;
                if ((unsigned long)idle > 128*1024*1024) {
                        avgidle = cl->maxidle;
                } else {
@@ -761,7 +766,7 @@ cbq_update(struct cbq_sched_data *q)
                        idle -= L2T(&q->link, len);
                        idle += L2T(cl, len);
 
-                       cl->undertime = q->now + idle;
+                       cl->undertime = now + idle;
                } else {
                        /* Underlimit */
 
@@ -771,7 +776,8 @@ cbq_update(struct cbq_sched_data *q)
                        else
                                cl->avgidle = avgidle;
                }
-               cl->last = q->now;
+               if ((s64)(now - cl->last) > 0)
+                       cl->last = now;
        }
 
        cbq_update_toplevel(q, this, q->tx_borrowed);
@@ -943,30 +949,13 @@ cbq_dequeue(struct Qdisc *sch)
        struct sk_buff *skb;
        struct cbq_sched_data *q = qdisc_priv(sch);
        psched_time_t now;
-       psched_tdiff_t incr;
 
        now = psched_get_time();
-       incr = now - q->now_rt;
-
-       if (q->tx_class) {
-               psched_tdiff_t incr2;
-               /* Time integrator. We calculate EOS time
-                * by adding expected packet transmission time.
-                * If real time is greater, we warp artificial clock,
-                * so that:
-                *
-                * cbq_time = max(real_time, work);
-                */
-               incr2 = L2T(&q->link, q->tx_len);
-               q->now += incr2;
+
+       if (q->tx_class)
                cbq_update(q);
-               if ((incr -= incr2) < 0)
-                       incr = 0;
-               q->now += incr;
-       } else {
-               if (now > q->now)
-                       q->now = now;
-       }
+
+       q->now = now;
        q->now_rt = now;
 
        for (;;) {