static struct cpufreq_policy *cpufreq_cpu_data[NR_CPUS];
static DEFINE_SPINLOCK(cpufreq_driver_lock);
+/*
+ * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
+ * all cpufreq/hotplug/workqueue/etc related lock issues.
+ *
+ * The rules for this semaphore:
+ * - Any routine that wants to read from the policy structure will
+ * do a down_read on this semaphore.
+ * - Any routine that will write to the policy structure and/or may take away
+ * the policy altogether (eg. CPU hotplug), will hold this lock in write
+ * mode before doing so.
+ *
+ * Additional rules:
+ * - All holders of the lock should check to make sure that the CPU they
+ * are concerned with are online after they get the lock.
+ * - Governor routines that can be called in cpufreq hotplug path should not
+ * take this sem as top level hotplug notifier handler takes this.
+ */
+static DEFINE_PER_CPU(int, policy_cpu);
+static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
+
+#define lock_policy_rwsem(mode, cpu) \
+int lock_policy_rwsem_##mode \
+(int cpu) \
+{ \
+ int policy_cpu = per_cpu(policy_cpu, cpu); \
+ BUG_ON(policy_cpu == -1); \
+ down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \
+ if (unlikely(!cpu_online(cpu))) { \
+ up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \
+ return -1; \
+ } \
+ \
+ return 0; \
+}
+
+lock_policy_rwsem(read, cpu);
+EXPORT_SYMBOL_GPL(lock_policy_rwsem_read);
+
+lock_policy_rwsem(write, cpu);
+EXPORT_SYMBOL_GPL(lock_policy_rwsem_write);
+
+void unlock_policy_rwsem_read(int cpu)
+{
+ int policy_cpu = per_cpu(policy_cpu, cpu);
+ BUG_ON(policy_cpu == -1);
+ up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
+}
+EXPORT_SYMBOL_GPL(unlock_policy_rwsem_read);
+
+void unlock_policy_rwsem_write(int cpu)
+{
+ int policy_cpu = per_cpu(policy_cpu, cpu);
+ BUG_ON(policy_cpu == -1);
+ up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
+}
+EXPORT_SYMBOL_GPL(unlock_policy_rwsem_write);
+
+
/* internal prototypes */
static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
+static unsigned int __cpufreq_get(unsigned int cpu);
static void handle_update(struct work_struct *work);
/**
if (ret != 1) \
return -EINVAL; \
\
- mutex_lock(&policy->lock); \
ret = __cpufreq_set_policy(policy, &new_policy); \
policy->user_policy.object = policy->object; \
- mutex_unlock(&policy->lock); \
\
return ret ? ret : count; \
}
static ssize_t show_cpuinfo_cur_freq (struct cpufreq_policy * policy,
char *buf)
{
- unsigned int cur_freq = cpufreq_get(policy->cpu);
+ unsigned int cur_freq = __cpufreq_get(policy->cpu);
if (!cur_freq)
return sprintf(buf, "<unknown>");
return sprintf(buf, "%u\n", cur_freq);
/* Do not use cpufreq_set_policy here or the user_policy.max
will be wrongly overridden */
- mutex_lock(&policy->lock);
ret = __cpufreq_set_policy(policy, &new_policy);
policy->user_policy.policy = policy->policy;
policy->user_policy.governor = policy->governor;
- mutex_unlock(&policy->lock);
if (ret)
return ret;
policy = cpufreq_cpu_get(policy->cpu);
if (!policy)
return -EINVAL;
+
+ if (lock_policy_rwsem_read(policy->cpu) < 0)
+ return -EINVAL;
+
if (fattr->show)
ret = fattr->show(policy, buf);
else
ret = -EIO;
+ unlock_policy_rwsem_read(policy->cpu);
+
cpufreq_cpu_put(policy);
return ret;
}
policy = cpufreq_cpu_get(policy->cpu);
if (!policy)
return -EINVAL;
+
+ if (lock_policy_rwsem_write(policy->cpu) < 0)
+ return -EINVAL;
+
if (fattr->store)
ret = fattr->store(policy, buf, count);
else
ret = -EIO;
+ unlock_policy_rwsem_write(policy->cpu);
+
cpufreq_cpu_put(policy);
return ret;
}
policy->cpu = cpu;
policy->cpus = cpumask_of_cpu(cpu);
- mutex_init(&policy->lock);
- mutex_lock(&policy->lock);
+ /* Initially set CPU itself as the policy_cpu */
+ per_cpu(policy_cpu, cpu) = cpu;
+ lock_policy_rwsem_write(cpu);
+
init_completion(&policy->kobj_unregister);
INIT_WORK(&policy->update, handle_update);
ret = cpufreq_driver->init(policy);
if (ret) {
dprintk("initialization failed\n");
- mutex_unlock(&policy->lock);
+ unlock_policy_rwsem_write(cpu);
goto err_out;
}
*/
managed_policy = cpufreq_cpu_get(j);
if (unlikely(managed_policy)) {
+
+ /* Set proper policy_cpu */
+ unlock_policy_rwsem_write(cpu);
+ per_cpu(policy_cpu, cpu) = managed_policy->cpu;
+
+ if (lock_policy_rwsem_write(cpu) < 0)
+ goto err_out_driver_exit;
+
spin_lock_irqsave(&cpufreq_driver_lock, flags);
managed_policy->cpus = policy->cpus;
cpufreq_cpu_data[cpu] = managed_policy;
&managed_policy->kobj,
"cpufreq");
if (ret) {
- mutex_unlock(&policy->lock);
+ unlock_policy_rwsem_write(cpu);
goto err_out_driver_exit;
}
cpufreq_debug_enable_ratelimit();
- mutex_unlock(&policy->lock);
ret = 0;
+ unlock_policy_rwsem_write(cpu);
goto err_out_driver_exit; /* call driver->exit() */
}
}
ret = kobject_register(&policy->kobj);
if (ret) {
- mutex_unlock(&policy->lock);
+ unlock_policy_rwsem_write(cpu);
goto err_out_driver_exit;
}
/* set up files for this cpu device */
sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
spin_lock_irqsave(&cpufreq_driver_lock, flags);
- for_each_cpu_mask(j, policy->cpus)
+ for_each_cpu_mask(j, policy->cpus) {
cpufreq_cpu_data[j] = policy;
+ per_cpu(policy_cpu, j) = policy->cpu;
+ }
spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
/* symlink affected CPUs */
ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
"cpufreq");
if (ret) {
- mutex_unlock(&policy->lock);
+ unlock_policy_rwsem_write(cpu);
goto err_out_unregister;
}
}
policy->governor = NULL; /* to assure that the starting sequence is
* run in cpufreq_set_policy */
- mutex_unlock(&policy->lock);
+ unlock_policy_rwsem_write(cpu);
/* set default policy */
ret = cpufreq_set_policy(&new_policy);
/**
- * cpufreq_remove_dev - remove a CPU device
+ * __cpufreq_remove_dev - remove a CPU device
*
* Removes the cpufreq interface for a CPU device.
+ * Caller should already have policy_rwsem in write mode for this CPU.
+ * This routine frees the rwsem before returning.
*/
-static int cpufreq_remove_dev (struct sys_device * sys_dev)
+static int __cpufreq_remove_dev (struct sys_device * sys_dev)
{
unsigned int cpu = sys_dev->id;
unsigned long flags;
if (!data) {
spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
cpufreq_debug_enable_ratelimit();
+ unlock_policy_rwsem_write(cpu);
return -EINVAL;
}
cpufreq_cpu_data[cpu] = NULL;
sysfs_remove_link(&sys_dev->kobj, "cpufreq");
cpufreq_cpu_put(data);
cpufreq_debug_enable_ratelimit();
+ unlock_policy_rwsem_write(cpu);
return 0;
}
#endif
if (!kobject_get(&data->kobj)) {
spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
cpufreq_debug_enable_ratelimit();
+ unlock_policy_rwsem_write(cpu);
return -EFAULT;
}
spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
#endif
- mutex_lock(&data->lock);
if (cpufreq_driver->target)
__cpufreq_governor(data, CPUFREQ_GOV_STOP);
- mutex_unlock(&data->lock);
+
+ unlock_policy_rwsem_write(cpu);
kobject_unregister(&data->kobj);
}
+static int cpufreq_remove_dev (struct sys_device * sys_dev)
+{
+ unsigned int cpu = sys_dev->id;
+ int retval;
+ if (unlikely(lock_policy_rwsem_write(cpu)))
+ BUG();
+
+ retval = __cpufreq_remove_dev(sys_dev);
+ return retval;
+}
+
+
static void handle_update(struct work_struct *work)
{
struct cpufreq_policy *policy =
unsigned int ret_freq = 0;
if (policy) {
- mutex_lock(&policy->lock);
+ if (unlikely(lock_policy_rwsem_read(cpu)))
+ return ret_freq;
+
ret_freq = policy->cur;
- mutex_unlock(&policy->lock);
+
+ unlock_policy_rwsem_read(cpu);
cpufreq_cpu_put(policy);
}
EXPORT_SYMBOL(cpufreq_quick_get);
-/**
- * cpufreq_get - get the current CPU frequency (in kHz)
- * @cpu: CPU number
- *
- * Get the CPU current (static) CPU frequency
- */
-unsigned int cpufreq_get(unsigned int cpu)
+static unsigned int __cpufreq_get(unsigned int cpu)
{
- struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+ struct cpufreq_policy *policy = cpufreq_cpu_data[cpu];
unsigned int ret_freq = 0;
- if (!policy)
- return 0;
-
if (!cpufreq_driver->get)
- goto out;
-
- mutex_lock(&policy->lock);
+ return (ret_freq);
ret_freq = cpufreq_driver->get(cpu);
}
}
- mutex_unlock(&policy->lock);
+ return (ret_freq);
+}
-out:
- cpufreq_cpu_put(policy);
+/**
+ * cpufreq_get - get the current CPU frequency (in kHz)
+ * @cpu: CPU number
+ *
+ * Get the CPU current (static) CPU frequency
+ */
+unsigned int cpufreq_get(unsigned int cpu)
+{
+ unsigned int ret_freq = 0;
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+
+ if (!policy)
+ goto out;
+
+ if (unlikely(lock_policy_rwsem_read(cpu)))
+ goto out_policy;
+
+ ret_freq = __cpufreq_get(cpu);
+
+ unlock_policy_rwsem_read(cpu);
+out_policy:
+ cpufreq_cpu_put(policy);
+out:
return (ret_freq);
}
EXPORT_SYMBOL(cpufreq_get);
if (!policy)
return -EINVAL;
- mutex_lock(&policy->lock);
+ if (unlikely(lock_policy_rwsem_write(policy->cpu)))
+ return -EINVAL;
ret = __cpufreq_driver_target(policy, target_freq, relation);
- mutex_unlock(&policy->lock);
+ unlock_policy_rwsem_write(policy->cpu);
cpufreq_cpu_put(policy);
return ret;
}
EXPORT_SYMBOL_GPL(cpufreq_driver_target);
-int cpufreq_driver_getavg(struct cpufreq_policy *policy)
+int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
{
int ret = 0;
if (!policy)
return -EINVAL;
- mutex_lock(&policy->lock);
-
if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
ret = cpufreq_driver->getavg(policy->cpu);
- mutex_unlock(&policy->lock);
-
cpufreq_cpu_put(policy);
return ret;
}
-EXPORT_SYMBOL_GPL(cpufreq_driver_getavg);
+EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
/*
* when "event" is CPUFREQ_GOV_LIMITS
if (!cpu_policy)
return -EINVAL;
- mutex_lock(&cpu_policy->lock);
memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
- mutex_unlock(&cpu_policy->lock);
cpufreq_cpu_put(cpu_policy);
return 0;
if (!data)
return -EINVAL;
- /* lock this CPU */
- mutex_lock(&data->lock);
+ if (unlikely(lock_policy_rwsem_write(policy->cpu)))
+ return -EINVAL;
+
ret = __cpufreq_set_policy(data, policy);
data->user_policy.min = data->min;
data->user_policy.policy = data->policy;
data->user_policy.governor = data->governor;
- mutex_unlock(&data->lock);
+ unlock_policy_rwsem_write(policy->cpu);
cpufreq_cpu_put(data);
if (!data)
return -ENODEV;
- mutex_lock(&data->lock);
+ if (unlikely(lock_policy_rwsem_write(cpu)))
+ return -EINVAL;
dprintk("updating policy for CPU %u\n", cpu);
memcpy(&policy, data, sizeof(struct cpufreq_policy));
ret = __cpufreq_set_policy(data, &policy);
- mutex_unlock(&data->lock);
+ unlock_policy_rwsem_write(cpu);
+
cpufreq_cpu_put(data);
return ret;
}
unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
- struct cpufreq_policy *policy;
struct sys_device *sys_dev;
+ struct cpufreq_policy *policy;
sys_dev = get_cpu_sysdev(cpu);
-
if (sys_dev) {
switch (action) {
case CPU_ONLINE:
cpufreq_add_dev(sys_dev);
break;
case CPU_DOWN_PREPARE:
- /*
- * We attempt to put this cpu in lowest frequency
- * possible before going down. This will permit
- * hardware-managed P-State to switch other related
- * threads to min or higher speeds if possible.
- */
+ if (unlikely(lock_policy_rwsem_write(cpu)))
+ BUG();
+
policy = cpufreq_cpu_data[cpu];
if (policy) {
- cpufreq_driver_target(policy, policy->min,
+ __cpufreq_driver_target(policy, policy->min,
CPUFREQ_RELATION_H);
}
+ __cpufreq_remove_dev(sys_dev);
break;
- case CPU_DEAD:
- cpufreq_remove_dev(sys_dev);
+ case CPU_DOWN_FAILED:
+ cpufreq_add_dev(sys_dev);
break;
}
}
return 0;
}
EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
+
+static int __init cpufreq_core_init(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ per_cpu(policy_cpu, cpu) = -1;
+ init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
+ }
+ return 0;
+}
+
+core_initcall(cpufreq_core_init);