*/
static DECLARE_MUTEX(cpuset_sem);
-static struct task_struct *cpuset_sem_owner;
-static int cpuset_sem_depth;
-
-/*
- * The global cpuset semaphore cpuset_sem can be needed by the
- * memory allocator to update a tasks mems_allowed (see the calls
- * to cpuset_update_current_mems_allowed()) or to walk up the
- * cpuset hierarchy to find a mem_exclusive cpuset see the calls
- * to cpuset_excl_nodes_overlap()).
- *
- * But if the memory allocation is being done by cpuset.c code, it
- * usually already holds cpuset_sem. Double tripping on a kernel
- * semaphore deadlocks the current task, and any other task that
- * subsequently tries to obtain the lock.
- *
- * Run all up's and down's on cpuset_sem through the following
- * wrappers, which will detect this nested locking, and avoid
- * deadlocking.
- */
-
-static inline void cpuset_down(struct semaphore *psem)
-{
- if (cpuset_sem_owner != current) {
- down(psem);
- cpuset_sem_owner = current;
- }
- cpuset_sem_depth++;
-}
-
-static inline void cpuset_up(struct semaphore *psem)
-{
- if (--cpuset_sem_depth == 0) {
- cpuset_sem_owner = NULL;
- up(psem);
- }
-}
/*
* A couple of forward declarations required, due to cyclic reference loop:
* Refresh current tasks mems_allowed and mems_generation from
* current tasks cpuset. Call with cpuset_sem held.
*
- * This routine is needed to update the per-task mems_allowed
- * data, within the tasks context, when it is trying to allocate
- * memory (in various mm/mempolicy.c routines) and notices
- * that some other task has been modifying its cpuset.
+ * Be sure to call refresh_mems() on any cpuset operation which
+ * (1) holds cpuset_sem, and (2) might possibly alloc memory.
+ * Call after obtaining cpuset_sem lock, before any possible
+ * allocation. Otherwise one risks trying to allocate memory
+ * while the task cpuset_mems_generation is not the same as
+ * the mems_generation in its cpuset, which would deadlock on
+ * cpuset_sem in cpuset_update_current_mems_allowed().
+ *
+ * Since we hold cpuset_sem, once refresh_mems() is called, the
+ * test (current->cpuset_mems_generation != cs->mems_generation)
+ * in cpuset_update_current_mems_allowed() will remain false,
+ * until we drop cpuset_sem. Anyone else who would change our
+ * cpusets mems_generation needs to lock cpuset_sem first.
*/
static void refresh_mems(void)
}
buffer[nbytes] = 0; /* nul-terminate */
- cpuset_down(&cpuset_sem);
+ down(&cpuset_sem);
if (is_removed(cs)) {
retval = -ENODEV;
if (retval == 0)
retval = nbytes;
out2:
- cpuset_up(&cpuset_sem);
+ up(&cpuset_sem);
cpuset_release_agent(pathbuf);
out1:
kfree(buffer);
{
cpumask_t mask;
- cpuset_down(&cpuset_sem);
+ down(&cpuset_sem);
mask = cs->cpus_allowed;
- cpuset_up(&cpuset_sem);
+ up(&cpuset_sem);
return cpulist_scnprintf(page, PAGE_SIZE, mask);
}
{
nodemask_t mask;
- cpuset_down(&cpuset_sem);
+ down(&cpuset_sem);
mask = cs->mems_allowed;
- cpuset_up(&cpuset_sem);
+ up(&cpuset_sem);
return nodelist_scnprintf(page, PAGE_SIZE, mask);
}
if (!cs)
return -ENOMEM;
- cpuset_down(&cpuset_sem);
+ down(&cpuset_sem);
+ refresh_mems();
cs->flags = 0;
if (notify_on_release(parent))
set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
* will down() this new directory's i_sem and if we race with
* another mkdir, we might deadlock.
*/
- cpuset_up(&cpuset_sem);
+ up(&cpuset_sem);
err = cpuset_populate_dir(cs->dentry);
/* If err < 0, we have a half-filled directory - oh well ;) */
return 0;
err:
list_del(&cs->sibling);
- cpuset_up(&cpuset_sem);
+ up(&cpuset_sem);
kfree(cs);
return err;
}
/* the vfs holds both inode->i_sem already */
- cpuset_down(&cpuset_sem);
+ down(&cpuset_sem);
+ refresh_mems();
if (atomic_read(&cs->count) > 0) {
- cpuset_up(&cpuset_sem);
+ up(&cpuset_sem);
return -EBUSY;
}
if (!list_empty(&cs->children)) {
- cpuset_up(&cpuset_sem);
+ up(&cpuset_sem);
return -EBUSY;
}
parent = cs->parent;
spin_unlock(&d->d_lock);
cpuset_d_remove_dir(d);
dput(d);
- cpuset_up(&cpuset_sem);
+ up(&cpuset_sem);
cpuset_release_agent(pathbuf);
return 0;
}
if (notify_on_release(cs)) {
char *pathbuf = NULL;
- cpuset_down(&cpuset_sem);
+ down(&cpuset_sem);
if (atomic_dec_and_test(&cs->count))
check_for_release(cs, &pathbuf);
- cpuset_up(&cpuset_sem);
+ up(&cpuset_sem);
cpuset_release_agent(pathbuf);
} else {
atomic_dec(&cs->count);
{
cpumask_t mask;
- cpuset_down(&cpuset_sem);
+ down(&cpuset_sem);
task_lock((struct task_struct *)tsk);
guarantee_online_cpus(tsk->cpuset, &mask);
task_unlock((struct task_struct *)tsk);
- cpuset_up(&cpuset_sem);
+ up(&cpuset_sem);
return mask;
}
if (!cs)
return; /* task is exiting */
if (current->cpuset_mems_generation != cs->mems_generation) {
- cpuset_down(&cpuset_sem);
+ down(&cpuset_sem);
refresh_mems();
- cpuset_up(&cpuset_sem);
+ up(&cpuset_sem);
}
}
return 0;
/* Not hardwall and node outside mems_allowed: scan up cpusets */
- cpuset_down(&cpuset_sem);
+ down(&cpuset_sem);
cs = current->cpuset;
if (!cs)
goto done; /* current task exiting */
cs = nearest_exclusive_ancestor(cs);
allowed = node_isset(node, cs->mems_allowed);
done:
- cpuset_up(&cpuset_sem);
+ up(&cpuset_sem);
return allowed;
}
const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */
int overlap = 0; /* do cpusets overlap? */
- cpuset_down(&cpuset_sem);
+ down(&cpuset_sem);
cs1 = current->cpuset;
if (!cs1)
goto done; /* current task exiting */
cs2 = nearest_exclusive_ancestor(cs2);
overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed);
done:
- cpuset_up(&cpuset_sem);
+ up(&cpuset_sem);
return overlap;
}
return -ENOMEM;
tsk = m->private;
- cpuset_down(&cpuset_sem);
+ down(&cpuset_sem);
task_lock(tsk);
cs = tsk->cpuset;
task_unlock(tsk);
seq_puts(m, buf);
seq_putc(m, '\n');
out:
- cpuset_up(&cpuset_sem);
+ up(&cpuset_sem);
kfree(buf);
return retval;
}