sched: reduce balance-tasks overhead

author Peter Williams <pwil3058@bigpond.net.au>

Wed, 24 Oct 2007 16:23:51 +0000 (18:23 +0200)

committer Ingo Molnar <mingo@elte.hu>

Wed, 24 Oct 2007 16:23:51 +0000 (18:23 +0200)
author Peter Williams <pwil3058@bigpond.net.au>
Wed, 24 Oct 2007 16:23:51 +0000 (18:23 +0200)
committer Ingo Molnar <mingo@elte.hu>
Wed, 24 Oct 2007 16:23:51 +0000 (18:23 +0200)
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 52288a647692aa9cb6490c38b239268cf5d5a9f3..639241f4f3d138b14a7e8d7f6319803caab53010 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -829,11 +829,14 @@ struct sched_class {
         void (*put_prev_task) (struct rq *rq, struct task_struct *p);
  
         unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
-                       struct rq *busiest,
-                       unsigned long max_nr_move, unsigned long max_load_move,
+                       struct rq *busiest, unsigned long max_load_move,
                         struct sched_domain *sd, enum cpu_idle_type idle,
                         int *all_pinned, int *this_best_prio);
  
+       int (*move_one_task) (struct rq *this_rq, int this_cpu,
+                             struct rq *busiest, struct sched_domain *sd,
+                             enum cpu_idle_type idle);
+
         void (*set_curr_task) (struct rq *rq);
         void (*task_tick) (struct rq *rq, struct task_struct *p);
         void (*task_new) (struct rq *rq, struct task_struct *p);
diff --git a/kernel/sched.c b/kernel/sched.c

index cc9cd5b710a6b4fb4585f44120b63a7a9407dd52..8607795fad69b6dfc6fe22688943e153e459f6b3 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -838,11 +838,35 @@ struct rq_iterator {
         struct task_struct *(*next)(void *);
  };
  
-static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
-                     unsigned long max_nr_move, unsigned long max_load_move,
-                     struct sched_domain *sd, enum cpu_idle_type idle,
-                     int *all_pinned, unsigned long *load_moved,
-                     int *this_best_prio, struct rq_iterator *iterator);
+#ifdef CONFIG_SMP
+static unsigned long
+balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
+             unsigned long max_load_move, struct sched_domain *sd,
+             enum cpu_idle_type idle, int *all_pinned,
+             int *this_best_prio, struct rq_iterator *iterator);
+
+static int
+iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
+                  struct sched_domain *sd, enum cpu_idle_type idle,
+                  struct rq_iterator *iterator);
+#else
+static inline unsigned long
+balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
+             unsigned long max_load_move, struct sched_domain *sd,
+             enum cpu_idle_type idle, int *all_pinned,
+             int *this_best_prio, struct rq_iterator *iterator)
+{
+       return 0;
+}
+
+static inline int
+iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
+                  struct sched_domain *sd, enum cpu_idle_type idle,
+                  struct rq_iterator *iterator)
+{
+       return 0;
+}
+#endif
  
  #include "sched_stats.h"
  #include "sched_idletask.c"
@@ -2224,17 +2248,17 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
         return 1;
  }
  
-static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
-                     unsigned long max_nr_move, unsigned long max_load_move,
-                     struct sched_domain *sd, enum cpu_idle_type idle,
-                     int *all_pinned, unsigned long *load_moved,
-                     int *this_best_prio, struct rq_iterator *iterator)
+static unsigned long
+balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
+             unsigned long max_load_move, struct sched_domain *sd,
+             enum cpu_idle_type idle, int *all_pinned,
+             int *this_best_prio, struct rq_iterator *iterator)
  {
         int pulled = 0, pinned = 0, skip_for_load;
         struct task_struct *p;
         long rem_load_move = max_load_move;
  
-       if (max_nr_move == 0 || max_load_move == 0)
+       if (max_load_move == 0)
                 goto out;
  
         pinned = 1;
@@ -2267,7 +2291,7 @@ next:
          * We only want to steal up to the prescribed number of tasks
          * and the prescribed amount of weighted load.
          */
-       if (pulled < max_nr_move && rem_load_move > 0) {
+       if (rem_load_move > 0) {
                 if (p->prio < *this_best_prio)
                         *this_best_prio = p->prio;
                 p = iterator->next(iterator->arg);
@@ -2275,7 +2299,7 @@ next:
         }
  out:
         /*
-        * Right now, this is the only place pull_task() is called,
+        * Right now, this is one of only two places pull_task() is called,
          * so we can safely collect pull_task() stats here rather than
          * inside pull_task().
          */
@@ -2283,8 +2307,8 @@ out:
  
         if (all_pinned)
                 *all_pinned = pinned;
-       *load_moved = max_load_move - rem_load_move;
-       return pulled;
+
+       return max_load_move - rem_load_move;
  }
  
  /*
@@ -2306,7 +2330,7 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
         do {
                 total_load_moved +=
                         class->load_balance(this_rq, this_cpu, busiest,
-                               ULONG_MAX, max_load_move - total_load_moved,
+                               max_load_move - total_load_moved,
                                 sd, idle, all_pinned, &this_best_prio);
                 class = class->next;
         } while (class && max_load_move > total_load_moved);
@@ -2314,6 +2338,32 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
         return total_load_moved > 0;
  }
  
+static int
+iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
+                  struct sched_domain *sd, enum cpu_idle_type idle,
+                  struct rq_iterator *iterator)
+{
+       struct task_struct *p = iterator->start(iterator->arg);
+       int pinned = 0;
+
+       while (p) {
+               if (can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) {
+                       pull_task(busiest, p, this_rq, this_cpu);
+                       /*
+                        * Right now, this is only the second place pull_task()
+                        * is called, so we can safely collect pull_task()
+                        * stats here rather than inside pull_task().
+                        */
+                       schedstat_inc(sd, lb_gained[idle]);
+
+                       return 1;
+               }
+               p = iterator->next(iterator->arg);
+       }
+
+       return 0;
+}
+
  /*
   * move_one_task tries to move exactly one task from busiest to this_rq, as
   * part of active balancing operations within "domain".
@@ -2325,12 +2375,9 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
                          struct sched_domain *sd, enum cpu_idle_type idle)
  {
         const struct sched_class *class;
-       int this_best_prio = MAX_PRIO;
  
         for (class = sched_class_highest; class; class = class->next)
-               if (class->load_balance(this_rq, this_cpu, busiest,
-                                       1, ULONG_MAX, sd, idle, NULL,
-                                       &this_best_prio))
+               if (class->move_one_task(this_rq, this_cpu, busiest, sd, idle))
                         return 1;
  
         return 0;
@@ -3267,18 +3314,6 @@ static inline void idle_balance(int cpu, struct rq *rq)
  {
  }
  
-/* Avoid "used but not defined" warning on UP */
-static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
-                     unsigned long max_nr_move, unsigned long max_load_move,
-                     struct sched_domain *sd, enum cpu_idle_type idle,
-                     int *all_pinned, unsigned long *load_moved,
-                     int *this_best_prio, struct rq_iterator *iterator)
-{
-       *load_moved = 0;
-
-       return 0;
-}
-
  #endif
  
  DEFINE_PER_CPU(struct kernel_stat, kstat);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c

index 166ed6db600b03490866661f107fa386d10fa181..a90d0457d603b23b6db6c945feaa3a9154b32a37 100644 (file)
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -936,12 +936,11 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
  
  static unsigned long
  load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
-                 unsigned long max_nr_move, unsigned long max_load_move,
+                 unsigned long max_load_move,
                   struct sched_domain *sd, enum cpu_idle_type idle,
                   int *all_pinned, int *this_best_prio)
  {
         struct cfs_rq *busy_cfs_rq;
-       unsigned long load_moved, total_nr_moved = 0, nr_moved;
         long rem_load_move = max_load_move;
         struct rq_iterator cfs_rq_iterator;
  
@@ -969,25 +968,47 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
  #else
  # define maxload rem_load_move
  #endif
-               /* pass busy_cfs_rq argument into
+               /*
+                * pass busy_cfs_rq argument into
                  * load_balance_[start|next]_fair iterators
                  */
                 cfs_rq_iterator.arg = busy_cfs_rq;
-               nr_moved = balance_tasks(this_rq, this_cpu, busiest,
-                               max_nr_move, maxload, sd, idle, all_pinned,
-                               &load_moved, this_best_prio, &cfs_rq_iterator);
-
-               total_nr_moved += nr_moved;
-               max_nr_move -= nr_moved;
-               rem_load_move -= load_moved;
+               rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
+                                              maxload, sd, idle, all_pinned,
+                                              this_best_prio,
+                                              &cfs_rq_iterator);
  
-               if (max_nr_move <= 0 || rem_load_move <= 0)
+               if (rem_load_move <= 0)
                         break;
         }
  
         return max_load_move - rem_load_move;
  }
  
+static int
+move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
+                  struct sched_domain *sd, enum cpu_idle_type idle)
+{
+       struct cfs_rq *busy_cfs_rq;
+       struct rq_iterator cfs_rq_iterator;
+
+       cfs_rq_iterator.start = load_balance_start_fair;
+       cfs_rq_iterator.next = load_balance_next_fair;
+
+       for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
+               /*
+                * pass busy_cfs_rq argument into
+                * load_balance_[start|next]_fair iterators
+                */
+               cfs_rq_iterator.arg = busy_cfs_rq;
+               if (iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
+                                      &cfs_rq_iterator))
+                   return 1;
+       }
+
+       return 0;
+}
+
  /*
   * scheduler tick hitting a task of our scheduling class:
   */
@@ -1064,6 +1085,7 @@ static const struct sched_class fair_sched_class = {
         .put_prev_task          = put_prev_task_fair,
  
         .load_balance           = load_balance_fair,
+       .move_one_task          = move_one_task_fair,
  
         .set_curr_task          = set_curr_task_fair,
         .task_tick              = task_tick_fair,
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c

index 6e2ead41516ee5bb7631b31e77ecd84f3922eb6a..586b06ca30aa92d1c190fed9f97eaf2230b88267 100644 (file)
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -39,9 +39,16 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
  
  static unsigned long
  load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
-                       unsigned long max_nr_move, unsigned long max_load_move,
-                       struct sched_domain *sd, enum cpu_idle_type idle,
-                       int *all_pinned, int *this_best_prio)
+                 unsigned long max_load_move,
+                 struct sched_domain *sd, enum cpu_idle_type idle,
+                 int *all_pinned, int *this_best_prio)
+{
+       return 0;
+}
+
+static int
+move_one_task_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
+                  struct sched_domain *sd, enum cpu_idle_type idle)
  {
         return 0;
  }
@@ -70,6 +77,7 @@ const struct sched_class idle_sched_class = {
         .put_prev_task          = put_prev_task_idle,
  
         .load_balance           = load_balance_idle,
+       .move_one_task          = move_one_task_idle,
  
         .set_curr_task          = set_curr_task_idle,
         .task_tick              = task_tick_idle,
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c

index d0097a0634e54f3dfcce71da006359c6d22d5b01..e9395b7119e6bd24f7a5ba1f252a957e43b1dc93 100644 (file)
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -172,13 +172,11 @@ static struct task_struct *load_balance_next_rt(void *arg)
  
  static unsigned long
  load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
-                       unsigned long max_nr_move, unsigned long max_load_move,
-                       struct sched_domain *sd, enum cpu_idle_type idle,
-                       int *all_pinned, int *this_best_prio)
+               unsigned long max_load_move,
+               struct sched_domain *sd, enum cpu_idle_type idle,
+               int *all_pinned, int *this_best_prio)
  {
-       int nr_moved;
         struct rq_iterator rt_rq_iterator;
-       unsigned long load_moved;
  
         rt_rq_iterator.start = load_balance_start_rt;
         rt_rq_iterator.next = load_balance_next_rt;
@@ -187,11 +185,22 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
          */
         rt_rq_iterator.arg = busiest;
  
-       nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move,
-                       max_load_move, sd, idle, all_pinned, &load_moved,
-                       this_best_prio, &rt_rq_iterator);
+       return balance_tasks(this_rq, this_cpu, busiest, max_load_move, sd,
+                            idle, all_pinned, this_best_prio, &rt_rq_iterator);
+}
+
+static int
+move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
+                struct sched_domain *sd, enum cpu_idle_type idle)
+{
+       struct rq_iterator rt_rq_iterator;
+
+       rt_rq_iterator.start = load_balance_start_rt;
+       rt_rq_iterator.next = load_balance_next_rt;
+       rt_rq_iterator.arg = busiest;
  
-       return load_moved;
+       return iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
+                                 &rt_rq_iterator);
  }
  
  static void task_tick_rt(struct rq *rq, struct task_struct *p)
@@ -237,6 +246,7 @@ const struct sched_class rt_sched_class = {
         .put_prev_task          = put_prev_task_rt,
  
         .load_balance           = load_balance_rt,
+       .move_one_task          = move_one_task_rt,
  
         .set_curr_task          = set_curr_task_rt,
         .task_tick              = task_tick_rt,
author	Peter Williams <pwil3058@bigpond.net.au>
	Wed, 24 Oct 2007 16:23:51 +0000 (18:23 +0200)
committer	Ingo Molnar <mingo@elte.hu>
	Wed, 24 Oct 2007 16:23:51 +0000 (18:23 +0200)
include/linux/sched.h		patch \| blob \| history
kernel/sched.c		patch \| blob \| history
kernel/sched_fair.c		patch \| blob \| history
kernel/sched_idletask.c		patch \| blob \| history
kernel/sched_rt.c		patch \| blob \| history