};
struct ipc_namespace {
- struct kref kref;
+ atomic_t count;
struct ipc_ids ids[3];
int sem_ctls[4];
extern struct ipc_namespace init_ipc_ns;
extern atomic_t nr_ipc_ns;
+extern spinlock_t mq_lock;
#if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC)
#define INIT_IPC_NS(ns) .ns = &init_ipc_ns,
#else
#endif /* CONFIG_SYSVIPC */
#ifdef CONFIG_POSIX_MQUEUE
-extern void mq_init_ns(struct ipc_namespace *ns);
+extern int mq_init_ns(struct ipc_namespace *ns);
/* default values */
#define DFLT_QUEUESMAX 256 /* max number of message queues */
#define DFLT_MSGMAX 10 /* max number of messages in each queue */
#define HARD_MSGMAX (131072/sizeof(void *))
#define DFLT_MSGSIZEMAX 8192 /* max message size */
#else
-#define mq_init_ns(ns) ((void) 0)
+static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
#endif
#if defined(CONFIG_IPC_NS)
-extern void free_ipc_ns(struct kref *kref);
+extern void free_ipc_ns(struct ipc_namespace *ns);
extern struct ipc_namespace *copy_ipcs(unsigned long flags,
struct ipc_namespace *ns);
extern void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
{
if (ns)
- kref_get(&ns->kref);
+ atomic_inc(&ns->count);
return ns;
}
-static inline void put_ipc_ns(struct ipc_namespace *ns)
-{
- kref_put(&ns->kref, free_ipc_ns);
-}
+extern void put_ipc_ns(struct ipc_namespace *ns);
#else
static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
struct ipc_namespace *ns)
static struct super_operations mqueue_super_ops;
static void remove_notification(struct mqueue_inode_info *info);
-static spinlock_t mq_lock;
static struct kmem_cache *mqueue_inode_cachep;
static struct ctl_table_header * mq_sysctl_table;
return container_of(inode, struct mqueue_inode_info, vfs_inode);
}
-void mq_init_ns(struct ipc_namespace *ns)
+/*
+ * This routine should be called with the mq_lock held.
+ */
+static inline struct ipc_namespace *__get_ns_from_inode(struct inode *inode)
{
- ns->mq_queues_count = 0;
- ns->mq_queues_max = DFLT_QUEUESMAX;
- ns->mq_msg_max = DFLT_MSGMAX;
- ns->mq_msgsize_max = DFLT_MSGSIZEMAX;
- ns->mq_mnt = mntget(init_ipc_ns.mq_mnt);
+ return get_ipc_ns(inode->i_sb->s_fs_info);
}
-void mq_exit_ns(struct ipc_namespace *ns)
+static struct ipc_namespace *get_ns_from_inode(struct inode *inode)
{
- /* will need to clear out ns->mq_mnt->mnt_sb->s_fs_info here */
- mntput(ns->mq_mnt);
+ struct ipc_namespace *ns;
+
+ spin_lock(&mq_lock);
+ ns = __get_ns_from_inode(inode);
+ spin_unlock(&mq_lock);
+ return ns;
}
-static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
- struct mq_attr *attr)
+static struct inode *mqueue_get_inode(struct super_block *sb,
+ struct ipc_namespace *ipc_ns, int mode,
+ struct mq_attr *attr)
{
struct user_struct *u = current_user();
struct inode *inode;
- struct ipc_namespace *ipc_ns = &init_ipc_ns;
inode = new_inode(sb);
if (inode) {
static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
{
struct inode *inode;
+ struct ipc_namespace *ns = data;
+ int error = 0;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
sb->s_magic = MQUEUE_MAGIC;
sb->s_op = &mqueue_super_ops;
- inode = mqueue_get_inode(sb, S_IFDIR | S_ISVTX | S_IRWXUGO, NULL);
- if (!inode)
- return -ENOMEM;
+ inode = mqueue_get_inode(sb, ns, S_IFDIR | S_ISVTX | S_IRWXUGO,
+ NULL);
+ if (!inode) {
+ error = -ENOMEM;
+ goto out;
+ }
sb->s_root = d_alloc_root(inode);
if (!sb->s_root) {
iput(inode);
- return -ENOMEM;
+ error = -ENOMEM;
}
- return 0;
+out:
+ return error;
}
static int mqueue_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name,
void *data, struct vfsmount *mnt)
{
- return get_sb_single(fs_type, flags, data, mqueue_fill_super, mnt);
+ if (!(flags & MS_KERNMOUNT))
+ data = current->nsproxy->ipc_ns;
+ return get_sb_ns(fs_type, flags, data, mqueue_fill_super, mnt);
}
static void init_once(void *foo)
struct user_struct *user;
unsigned long mq_bytes;
int i;
- struct ipc_namespace *ipc_ns = &init_ipc_ns;
+ struct ipc_namespace *ipc_ns;
if (S_ISDIR(inode->i_mode)) {
clear_inode(inode);
return;
}
+ ipc_ns = get_ns_from_inode(inode);
info = MQUEUE_I(inode);
spin_lock(&info->lock);
for (i = 0; i < info->attr.mq_curmsgs; i++)
if (user) {
spin_lock(&mq_lock);
user->mq_bytes -= mq_bytes;
- ipc_ns->mq_queues_count--;
+ /*
+ * get_ns_from_inode() ensures that the
+ * (ipc_ns = sb->s_fs_info) is either a valid ipc_ns
+ * to which we now hold a reference, or it is NULL.
+ * We can't put it here under mq_lock, though.
+ */
+ if (ipc_ns)
+ ipc_ns->mq_queues_count--;
spin_unlock(&mq_lock);
free_uid(user);
}
+ if (ipc_ns)
+ put_ipc_ns(ipc_ns);
}
static int mqueue_create(struct inode *dir, struct dentry *dentry,
struct inode *inode;
struct mq_attr *attr = dentry->d_fsdata;
int error;
- struct ipc_namespace *ipc_ns = &init_ipc_ns;
+ struct ipc_namespace *ipc_ns;
spin_lock(&mq_lock);
+ ipc_ns = __get_ns_from_inode(dir);
+ if (!ipc_ns) {
+ error = -EACCES;
+ goto out_unlock;
+ }
if (ipc_ns->mq_queues_count >= ipc_ns->mq_queues_max &&
!capable(CAP_SYS_RESOURCE)) {
error = -ENOSPC;
ipc_ns->mq_queues_count++;
spin_unlock(&mq_lock);
- inode = mqueue_get_inode(dir->i_sb, mode, attr);
+ inode = mqueue_get_inode(dir->i_sb, ipc_ns, mode, attr);
if (!inode) {
error = -ENOMEM;
spin_lock(&mq_lock);
goto out_unlock;
}
+ put_ipc_ns(ipc_ns);
dir->i_size += DIRENT_SIZE;
dir->i_ctime = dir->i_mtime = dir->i_atime = CURRENT_TIME;
return 0;
out_unlock:
spin_unlock(&mq_lock);
+ if (ipc_ns)
+ put_ipc_ns(ipc_ns);
return error;
}
char *name;
struct mq_attr attr;
int fd, error;
- struct ipc_namespace *ipc_ns = &init_ipc_ns;
+ struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr)))
return -EFAULT;
char *name;
struct dentry *dentry;
struct inode *inode = NULL;
- struct ipc_namespace *ipc_ns = &init_ipc_ns;
+ struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
name = getname(u_name);
if (IS_ERR(name))
.kill_sb = kill_litter_super,
};
+int mq_init_ns(struct ipc_namespace *ns)
+{
+ ns->mq_queues_count = 0;
+ ns->mq_queues_max = DFLT_QUEUESMAX;
+ ns->mq_msg_max = DFLT_MSGMAX;
+ ns->mq_msgsize_max = DFLT_MSGSIZEMAX;
+
+ ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns);
+ if (IS_ERR(ns->mq_mnt)) {
+ int err = PTR_ERR(ns->mq_mnt);
+ ns->mq_mnt = NULL;
+ return err;
+ }
+ return 0;
+}
+
+void mq_clear_sbinfo(struct ipc_namespace *ns)
+{
+ ns->mq_mnt->mnt_sb->s_fs_info = NULL;
+}
+
+void mq_put_mnt(struct ipc_namespace *ns)
+{
+ mntput(ns->mq_mnt);
+}
+
static int msg_max_limit_min = MIN_MSGMAX;
static int msg_max_limit_max = MAX_MSGMAX;
if (error)
goto out_sysctl;
- init_ipc_ns.mq_mnt = kern_mount(&mqueue_fs_type);
+ spin_lock_init(&mq_lock);
+
+ init_ipc_ns.mq_mnt = kern_mount_data(&mqueue_fs_type, &init_ipc_ns);
if (IS_ERR(init_ipc_ns.mq_mnt)) {
error = PTR_ERR(init_ipc_ns.mq_mnt);
goto out_filesystem;
}
- /* internal initialization - not common for vfs */
- spin_lock_init(&mq_lock);
-
return 0;
out_filesystem:
#include <linux/rcupdate.h>
#include <linux/nsproxy.h>
#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
#include "util.h"
static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns)
{
struct ipc_namespace *ns;
+ int err;
ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
if (ns == NULL)
return ERR_PTR(-ENOMEM);
+ atomic_set(&ns->count, 1);
+ err = mq_init_ns(ns);
+ if (err) {
+ kfree(ns);
+ return ERR_PTR(err);
+ }
atomic_inc(&nr_ipc_ns);
sem_init_ns(ns);
msg_init_ns(ns);
shm_init_ns(ns);
- mq_init_ns(ns);
/*
* msgmni has already been computed for the new ipc ns.
ipcns_notify(IPCNS_CREATED);
register_ipcns_notifier(ns);
- kref_init(&ns->kref);
return ns;
}
up_write(&ids->rw_mutex);
}
-void free_ipc_ns(struct kref *kref)
+/*
+ * put_ipc_ns - drop a reference to an ipc namespace.
+ * @ns: the namespace to put
+ *
+ * If this is the last task in the namespace exiting, and
+ * it is dropping the refcount to 0, then it can race with
+ * a task in another ipc namespace but in a mounts namespace
+ * which has this ipcns's mqueuefs mounted, doing some action
+ * with one of the mqueuefs files. That can raise the refcount.
+ * So dropping the refcount, and raising the refcount when
+ * accessing it through the VFS, are protected with mq_lock.
+ *
+ * (Clearly, a task raising the refcount on its own ipc_ns
+ * needn't take mq_lock since it can't race with the last task
+ * in the ipcns exiting).
+ */
+void put_ipc_ns(struct ipc_namespace *ns)
{
- struct ipc_namespace *ns;
+ if (atomic_dec_and_lock(&ns->count, &mq_lock)) {
+ mq_clear_sbinfo(ns);
+ spin_unlock(&mq_lock);
+ mq_put_mnt(ns);
+ free_ipc_ns(ns);
+ }
+}
- ns = container_of(kref, struct ipc_namespace, kref);
+void free_ipc_ns(struct ipc_namespace *ns)
+{
/*
* Unregistering the hotplug notifier at the beginning guarantees
* that the ipc namespace won't be freed while we are inside the
sem_exit_ns(ns);
msg_exit_ns(ns);
shm_exit_ns(ns);
- mq_exit_ns(ns);
kfree(ns);
atomic_dec(&nr_ipc_ns);