sched/deadline: Fix bandwidth accounting at all levels after offline migration
authorJuri Lelli <juri.lelli@redhat.com>
Fri, 19 Jul 2019 13:59:56 +0000 (15:59 +0200)
committerIngo Molnar <mingo@kernel.org>
Thu, 25 Jul 2019 13:55:02 +0000 (15:55 +0200)
If a task happens to be throttled while the CPU it was running on gets
hotplugged off, the bandwidth associated with the task is not correctly
migrated with it when the replenishment timer fires (offline_migration).

Fix things up, for this_bw, running_bw and total_bw, when replenishment
timer fires and task is migrated (dl_task_offline_migration()).

Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bristot@redhat.com
Cc: claudio@evidence.eu.com
Cc: lizefan@huawei.com
Cc: longman@redhat.com
Cc: luca.abeni@santannapisa.it
Cc: mathieu.poirier@linaro.org
Cc: rostedt@goodmis.org
Cc: tj@kernel.org
Cc: tommaso.cucinotta@santannapisa.it
Link: https://lkml.kernel.org/r/20190719140000.31694-5-juri.lelli@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
kernel/sched/deadline.c

index 0f9d2180be237bd48ae5f808a476f77afc81f5e5..039dde2b1dacc10234d71db8f0490ba6a6f5e2d3 100644 (file)
@@ -529,6 +529,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq);
 static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p)
 {
        struct rq *later_rq = NULL;
+       struct dl_bw *dl_b;
 
        later_rq = find_lock_later_rq(p, rq);
        if (!later_rq) {
@@ -557,6 +558,38 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
                double_lock_balance(rq, later_rq);
        }
 
+       if (p->dl.dl_non_contending || p->dl.dl_throttled) {
+               /*
+                * Inactive timer is armed (or callback is running, but
+                * waiting for us to release rq locks). In any case, when it
+                * will fire (or continue), it will see running_bw of this
+                * task migrated to later_rq (and correctly handle it).
+                */
+               sub_running_bw(&p->dl, &rq->dl);
+               sub_rq_bw(&p->dl, &rq->dl);
+
+               add_rq_bw(&p->dl, &later_rq->dl);
+               add_running_bw(&p->dl, &later_rq->dl);
+       } else {
+               sub_rq_bw(&p->dl, &rq->dl);
+               add_rq_bw(&p->dl, &later_rq->dl);
+       }
+
+       /*
+        * And we finally need to fixup root_domain(s) bandwidth accounting,
+        * since p is still hanging out in the old (now moved to default) root
+        * domain.
+        */
+       dl_b = &rq->rd->dl_bw;
+       raw_spin_lock(&dl_b->lock);
+       __dl_sub(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
+       raw_spin_unlock(&dl_b->lock);
+
+       dl_b = &later_rq->rd->dl_bw;
+       raw_spin_lock(&dl_b->lock);
+       __dl_add(dl_b, p->dl.dl_bw, cpumask_weight(later_rq->rd->span));
+       raw_spin_unlock(&dl_b->lock);
+
        set_task_cpu(p, later_rq->cpu);
        double_unlock_balance(later_rq, rq);