From c9819f4593e8d052b41a89f47140f5c5e7e30582 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 10 Dec 2006 02:20:25 -0800 Subject: [PATCH] [PATCH] sched: use softirq for load balancing Call rebalance_tick (renamed to run_rebalance_domains) from a newly introduced softirq. We calculate the earliest time for each layer of sched domains to be rescanned (this is the rescan time for idle) and use the earliest of those to schedule the softirq via a new field "next_balance" added to struct rq. Signed-off-by: Christoph Lameter Cc: Peter Williams Cc: Nick Piggin Cc: Christoph Lameter Cc: "Siddha, Suresh B" Cc: "Chen, Kenneth W" Acked-by: Ingo Molnar Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/interrupt.h | 3 ++- kernel/sched.c | 22 +++++++++++++++++----- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index de7593f4e895..e36e86c869fb 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -231,7 +231,8 @@ enum NET_TX_SOFTIRQ, NET_RX_SOFTIRQ, BLOCK_SOFTIRQ, - TASKLET_SOFTIRQ + TASKLET_SOFTIRQ, + SCHED_SOFTIRQ, }; /* softirq mask and active fields moved to irq_cpustat_t in diff --git a/kernel/sched.c b/kernel/sched.c index 14a8d9050cd4..0a3e748d737d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -227,6 +227,7 @@ struct rq { unsigned long expired_timestamp; unsigned long long timestamp_last_tick; struct task_struct *curr, *idle; + unsigned long next_balance; struct mm_struct *prev_mm; struct prio_array *active, *expired, arrays[2]; int best_expired_prio; @@ -2858,7 +2859,7 @@ static void update_load(struct rq *this_rq) } /* - * rebalance_tick will get called every timer tick, on every CPU. + * run_rebalance_domains is triggered when needed from the scheduler tick. * * It checks each scheduling domain to see if it is due to be balanced, * and initiates a balancing operation if so. @@ -2866,9 +2867,10 @@ static void update_load(struct rq *this_rq) * Balancing parameters are set up in arch_init_sched_domains. */ -static void -rebalance_tick(int this_cpu, struct rq *this_rq) +static void run_rebalance_domains(struct softirq_action *h) { + int this_cpu = smp_processor_id(); + struct rq *this_rq = cpu_rq(this_cpu); unsigned long interval; struct sched_domain *sd; /* @@ -2877,6 +2879,8 @@ rebalance_tick(int this_cpu, struct rq *this_rq) */ enum idle_type idle = !this_rq->nr_running ? SCHED_IDLE : NOT_IDLE; + /* Earliest time when we have to call run_rebalance_domains again */ + unsigned long next_balance = jiffies + 60*HZ; for_each_domain(this_cpu, sd) { if (!(sd->flags & SD_LOAD_BALANCE)) @@ -2891,7 +2895,7 @@ rebalance_tick(int this_cpu, struct rq *this_rq) if (unlikely(!interval)) interval = 1; - if (jiffies - sd->last_balance >= interval) { + if (time_after_eq(jiffies, sd->last_balance + interval)) { if (load_balance(this_cpu, this_rq, sd, idle)) { /* * We've pulled tasks over so either we're no @@ -2902,7 +2906,10 @@ rebalance_tick(int this_cpu, struct rq *this_rq) } sd->last_balance += interval; } + if (time_after(next_balance, sd->last_balance + interval)) + next_balance = sd->last_balance + interval; } + this_rq->next_balance = next_balance; } #else /* @@ -3155,7 +3162,8 @@ void scheduler_tick(void) task_running_tick(rq, p); #ifdef CONFIG_SMP update_load(rq); - rebalance_tick(cpu, rq); + if (time_after_eq(jiffies, rq->next_balance)) + raise_softirq(SCHED_SOFTIRQ); #endif } @@ -6859,6 +6867,10 @@ void __init sched_init(void) set_load_weight(&init_task); +#ifdef CONFIG_SMP + open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); +#endif + #ifdef CONFIG_RT_MUTEXES plist_head_init(&init_task.pi_waiters, &init_task.pi_lock); #endif -- 2.30.2