sched: Fix potential near-infinite distribute_cfs_runtime() loop

author Ben Segall <bsegall@google.com>

Fri, 20 Jun 2014 22:21:20 +0000 (15:21 -0700)

committer Ingo Molnar <mingo@kernel.org>

Sat, 5 Jul 2014 09:17:29 +0000 (11:17 +0200)
author Ben Segall <bsegall@google.com>
Fri, 20 Jun 2014 22:21:20 +0000 (15:21 -0700)
committer Ingo Molnar <mingo@kernel.org>
Sat, 5 Jul 2014 09:17:29 +0000 (11:17 +0200)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 1f9c4571615d0772d74bbe90b6e5699f89610747..ef5eac773c7090153a2e22d4b49db9ca109153ac 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3361,7 +3361,11 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
         cfs_rq->throttled = 1;
         cfs_rq->throttled_clock = rq_clock(rq);
         raw_spin_lock(&cfs_b->lock);
-       list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq);
+       /*
+        * Add to the _head_ of the list, so that an already-started
+        * distribute_cfs_runtime will not see us
+        */
+       list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq);
         if (!cfs_b->timer_active)
                 __start_cfs_bandwidth(cfs_b, false);
         raw_spin_unlock(&cfs_b->lock);
@@ -3418,7 +3422,8 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
                 u64 remaining, u64 expires)
  {
         struct cfs_rq *cfs_rq;
-       u64 runtime = remaining;
+       u64 runtime;
+       u64 starting_runtime = remaining;
  
         rcu_read_lock();
         list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq,
@@ -3449,7 +3454,7 @@ next:
         }
         rcu_read_unlock();
  
-       return remaining;
+       return starting_runtime - remaining;
  }
  
  /*
@@ -3495,22 +3500,17 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
         /* account preceding periods in which throttling occurred */
         cfs_b->nr_throttled += overrun;
  
-       /*
-        * There are throttled entities so we must first use the new bandwidth
-        * to unthrottle them before making it generally available.  This
-        * ensures that all existing debts will be paid before a new cfs_rq is
-        * allowed to run.
-        */
-       runtime = cfs_b->runtime;
         runtime_expires = cfs_b->runtime_expires;
-       cfs_b->runtime = 0;
  
         /*
-        * This check is repeated as we are holding onto the new bandwidth
-        * while we unthrottle.  This can potentially race with an unthrottled
-        * group trying to acquire new bandwidth from the global pool.
+        * This check is repeated as we are holding onto the new bandwidth while
+        * we unthrottle. This can potentially race with an unthrottled group
+        * trying to acquire new bandwidth from the global pool. This can result
+        * in us over-using our runtime if it is all used during this loop, but
+        * only by limited amounts in that extreme case.
          */
-       while (throttled && runtime > 0) {
+       while (throttled && cfs_b->runtime > 0) {
+               runtime = cfs_b->runtime;
                 raw_spin_unlock(&cfs_b->lock);
                 /* we can't nest cfs_b->lock while distributing bandwidth */
                 runtime = distribute_cfs_runtime(cfs_b, runtime,
@@ -3518,10 +3518,10 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
                 raw_spin_lock(&cfs_b->lock);
  
                 throttled = !list_empty(&cfs_b->throttled_cfs_rq);
+
+               cfs_b->runtime -= min(runtime, cfs_b->runtime);
         }
  
-       /* return (any) remaining runtime */
-       cfs_b->runtime = runtime;
         /*
          * While we are ensured activity in the period following an
          * unthrottle, this also covers the case in which the new bandwidth is
@@ -3632,10 +3632,9 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
                 return;
         }
  
-       if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice) {
+       if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice)
                 runtime = cfs_b->runtime;
-               cfs_b->runtime = 0;
-       }
+
         expires = cfs_b->runtime_expires;
         raw_spin_unlock(&cfs_b->lock);
  
@@ -3646,7 +3645,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
  
         raw_spin_lock(&cfs_b->lock);
         if (expires == cfs_b->runtime_expires)
-               cfs_b->runtime = runtime;
+               cfs_b->runtime -= min(runtime, cfs_b->runtime);
         raw_spin_unlock(&cfs_b->lock);
  }
author	Ben Segall <bsegall@google.com>
	Fri, 20 Jun 2014 22:21:20 +0000 (15:21 -0700)
committer	Ingo Molnar <mingo@kernel.org>
	Sat, 5 Jul 2014 09:17:29 +0000 (11:17 +0200)