sched/core: Use load_avg for selecting idlest group

author Vincent Guittot <vincent.guittot@linaro.org>

Thu, 8 Dec 2016 16:56:54 +0000 (17:56 +0100)

committer Ingo Molnar <mingo@kernel.org>

Sun, 11 Dec 2016 12:10:57 +0000 (13:10 +0100)
author Vincent Guittot <vincent.guittot@linaro.org>
Thu, 8 Dec 2016 16:56:54 +0000 (17:56 +0100)
committer Ingo Molnar <mingo@kernel.org>
Sun, 11 Dec 2016 12:10:57 +0000 (13:10 +0100)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index ebb815f6bda71326099a713c9830aa128b6a18dc..6559d197e08a5be3809a2176c8d2fdb52b38389d 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5405,16 +5405,20 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
  {
         struct sched_group *idlest = NULL, *group = sd->groups;
         struct sched_group *most_spare_sg = NULL;
-       unsigned long min_load = ULONG_MAX, this_load = 0;
+       unsigned long min_runnable_load = ULONG_MAX, this_runnable_load = 0;
+       unsigned long min_avg_load = ULONG_MAX, this_avg_load = 0;
         unsigned long most_spare = 0, this_spare = 0;
         int load_idx = sd->forkexec_idx;
-       int imbalance = 100 + (sd->imbalance_pct-100)/2;
+       int imbalance_scale = 100 + (sd->imbalance_pct-100)/2;
+       unsigned long imbalance = scale_load_down(NICE_0_LOAD) *
+                               (sd->imbalance_pct-100) / 100;
  
         if (sd_flag & SD_BALANCE_WAKE)
                 load_idx = sd->wake_idx;
  
         do {
-               unsigned long load, avg_load, spare_cap, max_spare_cap;
+               unsigned long load, avg_load, runnable_load;
+               unsigned long spare_cap, max_spare_cap;
                 int local_group;
                 int i;
  
@@ -5431,6 +5435,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
                  * the group containing the CPU with most spare capacity.
                  */
                 avg_load = 0;
+               runnable_load = 0;
                 max_spare_cap = 0;
  
                 for_each_cpu(i, sched_group_cpus(group)) {
@@ -5440,7 +5445,9 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
                         else
                                 load = target_load(i, load_idx);
  
-                       avg_load += load;
+                       runnable_load += load;
+
+                       avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs);
  
                         spare_cap = capacity_spare_wake(i, p);
  
@@ -5449,14 +5456,31 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
                 }
  
                 /* Adjust by relative CPU capacity of the group */
-               avg_load = (avg_load * SCHED_CAPACITY_SCALE) / group->sgc->capacity;
+               avg_load = (avg_load * SCHED_CAPACITY_SCALE) /
+                                       group->sgc->capacity;
+               runnable_load = (runnable_load * SCHED_CAPACITY_SCALE) /
+                                       group->sgc->capacity;
  
                 if (local_group) {
-                       this_load = avg_load;
+                       this_runnable_load = runnable_load;
+                       this_avg_load = avg_load;
                         this_spare = max_spare_cap;
                 } else {
-                       if (avg_load < min_load) {
-                               min_load = avg_load;
+                       if (min_runnable_load > (runnable_load + imbalance)) {
+                               /*
+                                * The runnable load is significantly smaller
+                                * so we can pick this new cpu
+                                */
+                               min_runnable_load = runnable_load;
+                               min_avg_load = avg_load;
+                               idlest = group;
+                       } else if ((runnable_load < (min_runnable_load + imbalance)) &&
+                                  (100*min_avg_load > imbalance_scale*avg_load)) {
+                               /*
+                                * The runnable loads are close so take the
+                                * blocked load into account through avg_load.
+                                */
+                               min_avg_load = avg_load;
                                 idlest = group;
                         }
  
@@ -5482,14 +5506,23 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
                 goto skip_spare;
  
         if (this_spare > task_util(p) / 2 &&
-           imbalance*this_spare > 100*most_spare)
+           imbalance_scale*this_spare > 100*most_spare)
                 return NULL;
-       else if (most_spare > task_util(p) / 2)
+
+       if (most_spare > task_util(p) / 2)
                 return most_spare_sg;
  
  skip_spare:
-       if (!idlest || 100*this_load < imbalance*min_load)
+       if (!idlest)
+               return NULL;
+
+       if (min_runnable_load > (this_runnable_load + imbalance))
                 return NULL;
+
+       if ((this_runnable_load < (min_runnable_load + imbalance)) &&
+            (100*this_avg_load < imbalance_scale*min_avg_load))
+               return NULL;
+
         return idlest;
  }
author	Vincent Guittot <vincent.guittot@linaro.org>
	Thu, 8 Dec 2016 16:56:54 +0000 (17:56 +0100)
committer	Ingo Molnar <mingo@kernel.org>
	Sun, 11 Dec 2016 12:10:57 +0000 (13:10 +0100)