PM / Domains: Fix asynchronous execution of *noirq() callbacks
authorUlf Hansson <ulf.hansson@linaro.org>
Wed, 8 Feb 2017 12:39:00 +0000 (13:39 +0100)
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>
Thu, 9 Feb 2017 00:01:26 +0000 (01:01 +0100)
As the PM core may invoke the *noirq() callbacks asynchronously, the
current lock-less approach in genpd doesn't work. The consequence is that
we may find concurrent operations racing to power on/off the PM domain.

As of now, no immediate errors has been reported, but it's probably only a
matter time. Therefor let's fix the problem now before this becomes a real
issue, by deploying the locking scheme to the relevant functions.

Reported-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
drivers/base/power/domain.c

index b04c14f2b4406967ef6b7f514accb560bd9fa4be..3a75fb1b4126f04c934bfa6a37cbe8dbac48fe10 100644 (file)
@@ -729,16 +729,18 @@ static bool genpd_dev_active_wakeup(struct generic_pm_domain *genpd,
 /**
  * genpd_sync_power_off - Synchronously power off a PM domain and its masters.
  * @genpd: PM domain to power off, if possible.
+ * @use_lock: use the lock.
+ * @depth: nesting count for lockdep.
  *
  * Check if the given PM domain can be powered off (during system suspend or
  * hibernation) and do that if so.  Also, in that case propagate to its masters.
  *
  * This function is only called in "noirq" and "syscore" stages of system power
- * transitions, so it need not acquire locks (all of the "noirq" callbacks are
- * executed sequentially, so it is guaranteed that it will never run twice in
- * parallel).
+ * transitions. The "noirq" callbacks may be executed asynchronously, thus in
+ * these cases the lock must be held.
  */
-static void genpd_sync_power_off(struct generic_pm_domain *genpd)
+static void genpd_sync_power_off(struct generic_pm_domain *genpd, bool use_lock,
+                                unsigned int depth)
 {
        struct gpd_link *link;
 
@@ -757,20 +759,29 @@ static void genpd_sync_power_off(struct generic_pm_domain *genpd)
 
        list_for_each_entry(link, &genpd->slave_links, slave_node) {
                genpd_sd_counter_dec(link->master);
-               genpd_sync_power_off(link->master);
+
+               if (use_lock)
+                       genpd_lock_nested(link->master, depth + 1);
+
+               genpd_sync_power_off(link->master, use_lock, depth + 1);
+
+               if (use_lock)
+                       genpd_unlock(link->master);
        }
 }
 
 /**
  * genpd_sync_power_on - Synchronously power on a PM domain and its masters.
  * @genpd: PM domain to power on.
+ * @use_lock: use the lock.
+ * @depth: nesting count for lockdep.
  *
  * This function is only called in "noirq" and "syscore" stages of system power
- * transitions, so it need not acquire locks (all of the "noirq" callbacks are
- * executed sequentially, so it is guaranteed that it will never run twice in
- * parallel).
+ * transitions. The "noirq" callbacks may be executed asynchronously, thus in
+ * these cases the lock must be held.
  */
-static void genpd_sync_power_on(struct generic_pm_domain *genpd)
+static void genpd_sync_power_on(struct generic_pm_domain *genpd, bool use_lock,
+                               unsigned int depth)
 {
        struct gpd_link *link;
 
@@ -778,8 +789,15 @@ static void genpd_sync_power_on(struct generic_pm_domain *genpd)
                return;
 
        list_for_each_entry(link, &genpd->slave_links, slave_node) {
-               genpd_sync_power_on(link->master);
                genpd_sd_counter_inc(link->master);
+
+               if (use_lock)
+                       genpd_lock_nested(link->master, depth + 1);
+
+               genpd_sync_power_on(link->master, use_lock, depth + 1);
+
+               if (use_lock)
+                       genpd_unlock(link->master);
        }
 
        _genpd_power_on(genpd, false);
@@ -888,13 +906,10 @@ static int pm_genpd_suspend_noirq(struct device *dev)
                        return ret;
        }
 
-       /*
-        * Since all of the "noirq" callbacks are executed sequentially, it is
-        * guaranteed that this function will never run twice in parallel for
-        * the same PM domain, so it is not necessary to use locking here.
-        */
+       genpd_lock(genpd);
        genpd->suspended_count++;
-       genpd_sync_power_off(genpd);
+       genpd_sync_power_off(genpd, true, 0);
+       genpd_unlock(genpd);
 
        return 0;
 }
@@ -919,13 +934,10 @@ static int pm_genpd_resume_noirq(struct device *dev)
        if (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev))
                return 0;
 
-       /*
-        * Since all of the "noirq" callbacks are executed sequentially, it is
-        * guaranteed that this function will never run twice in parallel for
-        * the same PM domain, so it is not necessary to use locking here.
-        */
-       genpd_sync_power_on(genpd);
+       genpd_lock(genpd);
+       genpd_sync_power_on(genpd, true, 0);
        genpd->suspended_count--;
+       genpd_unlock(genpd);
 
        if (genpd->dev_ops.stop && genpd->dev_ops.start)
                ret = pm_runtime_force_resume(dev);
@@ -1002,13 +1014,10 @@ static int pm_genpd_restore_noirq(struct device *dev)
                return -EINVAL;
 
        /*
-        * Since all of the "noirq" callbacks are executed sequentially, it is
-        * guaranteed that this function will never run twice in parallel for
-        * the same PM domain, so it is not necessary to use locking here.
-        *
         * At this point suspended_count == 0 means we are being run for the
         * first time for the given domain in the present cycle.
         */
+       genpd_lock(genpd);
        if (genpd->suspended_count++ == 0)
                /*
                 * The boot kernel might put the domain into arbitrary state,
@@ -1017,7 +1026,8 @@ static int pm_genpd_restore_noirq(struct device *dev)
                 */
                genpd->status = GPD_STATE_POWER_OFF;
 
-       genpd_sync_power_on(genpd);
+       genpd_sync_power_on(genpd, true, 0);
+       genpd_unlock(genpd);
 
        if (genpd->dev_ops.stop && genpd->dev_ops.start)
                ret = pm_runtime_force_resume(dev);
@@ -1072,9 +1082,9 @@ static void genpd_syscore_switch(struct device *dev, bool suspend)
 
        if (suspend) {
                genpd->suspended_count++;
-               genpd_sync_power_off(genpd);
+               genpd_sync_power_off(genpd, false, 0);
        } else {
-               genpd_sync_power_on(genpd);
+               genpd_sync_power_on(genpd, false, 0);
                genpd->suspended_count--;
        }
 }