cgroup: introduce cgroup namespaces

author Aditya Kali <adityakali@google.com>

Fri, 29 Jan 2016 08:54:06 +0000 (02:54 -0600)

committer Tejun Heo <tj@kernel.org>

Tue, 16 Feb 2016 18:04:58 +0000 (13:04 -0500)
author Aditya Kali <adityakali@google.com>
Fri, 29 Jan 2016 08:54:06 +0000 (02:54 -0600)
committer Tejun Heo <tj@kernel.org>
Tue, 16 Feb 2016 18:04:58 +0000 (13:04 -0500)
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c

index 276f12431dbfccdeb4add56a226c07d6062af38b..72cb26f85d58dce643936b7c447cb9b56b9bd7d3 100644 (file)
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -28,6 +28,9 @@ static const struct proc_ns_operations *ns_entries[] = {
         &userns_operations,
  #endif
         &mntns_operations,
+#ifdef CONFIG_CGROUPS
+       &cgroupns_operations,
+#endif
  };
  
  static const char *proc_ns_get_link(struct dentry *dentry,
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h

index 2162dca88dc0881623272fd56017a84ae0f663de..a20320c666fd669582066c1bca0a0b1e51f00e0b 100644 (file)
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -17,6 +17,11 @@
  #include <linux/seq_file.h>
  #include <linux/kernfs.h>
  #include <linux/jump_label.h>
+#include <linux/nsproxy.h>
+#include <linux/types.h>
+#include <linux/ns_common.h>
+#include <linux/nsproxy.h>
+#include <linux/user_namespace.h>
  
  #include <linux/cgroup-defs.h>
  
@@ -611,4 +616,48 @@ static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {}
  
  #endif /* CONFIG_CGROUP_DATA */
  
+struct cgroup_namespace {
+       atomic_t                count;
+       struct ns_common        ns;
+       struct user_namespace   *user_ns;
+       struct css_set          *root_cset;
+};
+
+extern struct cgroup_namespace init_cgroup_ns;
+
+#ifdef CONFIG_CGROUPS
+
+void free_cgroup_ns(struct cgroup_namespace *ns);
+
+struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
+                                       struct user_namespace *user_ns,
+                                       struct cgroup_namespace *old_ns);
+
+char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
+                    struct cgroup_namespace *ns);
+
+#else /* !CONFIG_CGROUPS */
+
+static inline void free_cgroup_ns(struct cgroup_namespace *ns) { }
+static inline struct cgroup_namespace *
+copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns,
+              struct cgroup_namespace *old_ns)
+{
+       return old_ns;
+}
+
+#endif /* !CONFIG_CGROUPS */
+
+static inline void get_cgroup_ns(struct cgroup_namespace *ns)
+{
+       if (ns)
+               atomic_inc(&ns->count);
+}
+
+static inline void put_cgroup_ns(struct cgroup_namespace *ns)
+{
+       if (ns && atomic_dec_and_test(&ns->count))
+               free_cgroup_ns(ns);
+}
+
  #endif /* _LINUX_CGROUP_H */
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h

index 35fa08fd77399a3a95ad8d4183f16990b8e6292a..ac0d65bef5d0860d7ff8944a097a122fb222ddc0 100644 (file)
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -8,6 +8,7 @@ struct mnt_namespace;
  struct uts_namespace;
  struct ipc_namespace;
  struct pid_namespace;
+struct cgroup_namespace;
  struct fs_struct;
  
  /*
@@ -33,6 +34,7 @@ struct nsproxy {
         struct mnt_namespace *mnt_ns;
         struct pid_namespace *pid_ns_for_children;
         struct net           *net_ns;
+       struct cgroup_namespace *cgroup_ns;
  };
  extern struct nsproxy init_nsproxy;
  
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h

index 42dfc615dbf8381a2057cb9956cec957e9ca5b1b..de0e7719d4c5594b6dd536aa49fad22c24abf3af 100644 (file)
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -9,6 +9,8 @@
  struct pid_namespace;
  struct nsproxy;
  struct path;
+struct task_struct;
+struct inode;
  
  struct proc_ns_operations {
         const char *name;
@@ -24,6 +26,7 @@ extern const struct proc_ns_operations ipcns_operations;
  extern const struct proc_ns_operations pidns_operations;
  extern const struct proc_ns_operations userns_operations;
  extern const struct proc_ns_operations mntns_operations;
+extern const struct proc_ns_operations cgroupns_operations;
  
  /*
   * We always define these enumerators
@@ -34,6 +37,7 @@ enum {
         PROC_UTS_INIT_INO       = 0xEFFFFFFEU,
         PROC_USER_INIT_INO      = 0xEFFFFFFDU,
         PROC_PID_INIT_INO       = 0xEFFFFFFCU,
+       PROC_CGROUP_INIT_INO    = 0xEFFFFFFBU,
  };
  
  #ifdef CONFIG_PROC_FS
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index 7ad61915967fc00c2875c5d6184c646a9ae32405..b001c5d36becf0f35cf0343eccdb82dcad16f1ab 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -59,6 +59,9 @@
  #include <linux/delay.h>
  #include <linux/atomic.h>
  #include <linux/cpuset.h>
+#include <linux/proc_ns.h>
+#include <linux/nsproxy.h>
+#include <linux/proc_ns.h>
  #include <net/sock.h>
  
  /*
@@ -212,6 +215,15 @@ static unsigned long have_fork_callback __read_mostly;
  static unsigned long have_exit_callback __read_mostly;
  static unsigned long have_free_callback __read_mostly;
  
+/* cgroup namespace for init task */
+struct cgroup_namespace init_cgroup_ns = {
+       .count          = { .counter = 2, },
+       .user_ns        = &init_user_ns,
+       .ns.ops         = &cgroupns_operations,
+       .ns.inum        = PROC_CGROUP_INIT_INO,
+       .root_cset      = &init_css_set,
+};
+
  /* Ditto for the can_fork callback. */
  static unsigned long have_canfork_callback __read_mostly;
  
@@ -2177,6 +2189,35 @@ static struct file_system_type cgroup2_fs_type = {
         .kill_sb = cgroup_kill_sb,
  };
  
+static char *cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
+                                  struct cgroup_namespace *ns)
+{
+       struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
+       int ret;
+
+       ret = kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
+       if (ret < 0 || ret >= buflen)
+               return NULL;
+       return buf;
+}
+
+char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
+                    struct cgroup_namespace *ns)
+{
+       char *ret;
+
+       mutex_lock(&cgroup_mutex);
+       spin_lock_bh(&css_set_lock);
+
+       ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
+
+       spin_unlock_bh(&css_set_lock);
+       mutex_unlock(&cgroup_mutex);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(cgroup_path_ns);
+
  /**
   * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
   * @task: target task
@@ -2204,7 +2245,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
  
         if (root) {
                 cgrp = task_cgroup_from_root(task, root);
-               path = cgroup_path(cgrp, buf, buflen);
+               path = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
         } else {
                 /* if no hierarchy exists, everyone is in "/" */
                 if (strlcpy(buf, "/", buflen) < buflen)
@@ -5297,6 +5338,8 @@ int __init cgroup_init(void)
         BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
         BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
  
+       get_user_ns(init_cgroup_ns.user_ns);
+
         mutex_lock(&cgroup_mutex);
  
         /* Add init_css_set to the hash table */
@@ -5438,7 +5481,8 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
                  * " (deleted)" is appended to the cgroup path.
                  */
                 if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
-                       path = cgroup_path(cgrp, buf, PATH_MAX);
+                       path = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
+                                               current->nsproxy->cgroup_ns);
                         if (!path) {
                                 retval = -ENAMETOOLONG;
                                 goto out_unlock;
@@ -5720,7 +5764,9 @@ static void cgroup_release_agent(struct work_struct *work)
         if (!pathbuf || !agentbuf)
                 goto out;
  
-       path = cgroup_path(cgrp, pathbuf, PATH_MAX);
+       spin_lock_bh(&css_set_lock);
+       path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
+       spin_unlock_bh(&css_set_lock);
         if (!path)
                 goto out;
  
@@ -5931,6 +5977,127 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
  
  #endif /* CONFIG_SOCK_CGROUP_DATA */
  
+/* cgroup namespaces */
+
+static struct cgroup_namespace *alloc_cgroup_ns(void)
+{
+       struct cgroup_namespace *new_ns;
+       int ret;
+
+       new_ns = kzalloc(sizeof(struct cgroup_namespace), GFP_KERNEL);
+       if (!new_ns)
+               return ERR_PTR(-ENOMEM);
+       ret = ns_alloc_inum(&new_ns->ns);
+       if (ret) {
+               kfree(new_ns);
+               return ERR_PTR(ret);
+       }
+       atomic_set(&new_ns->count, 1);
+       new_ns->ns.ops = &cgroupns_operations;
+       return new_ns;
+}
+
+void free_cgroup_ns(struct cgroup_namespace *ns)
+{
+       put_css_set(ns->root_cset);
+       put_user_ns(ns->user_ns);
+       ns_free_inum(&ns->ns);
+       kfree(ns);
+}
+EXPORT_SYMBOL(free_cgroup_ns);
+
+struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
+                                       struct user_namespace *user_ns,
+                                       struct cgroup_namespace *old_ns)
+{
+       struct cgroup_namespace *new_ns = NULL;
+       struct css_set *cset = NULL;
+       int err;
+
+       BUG_ON(!old_ns);
+
+       if (!(flags & CLONE_NEWCGROUP)) {
+               get_cgroup_ns(old_ns);
+               return old_ns;
+       }
+
+       /* Allow only sysadmin to create cgroup namespace. */
+       err = -EPERM;
+       if (!ns_capable(user_ns, CAP_SYS_ADMIN))
+               goto err_out;
+
+       mutex_lock(&cgroup_mutex);
+       spin_lock_bh(&css_set_lock);
+
+       cset = task_css_set(current);
+       get_css_set(cset);
+
+       spin_unlock_bh(&css_set_lock);
+       mutex_unlock(&cgroup_mutex);
+
+       err = -ENOMEM;
+       new_ns = alloc_cgroup_ns();
+       if (!new_ns)
+               goto err_out;
+
+       new_ns->user_ns = get_user_ns(user_ns);
+       new_ns->root_cset = cset;
+
+       return new_ns;
+
+err_out:
+       if (cset)
+               put_css_set(cset);
+       kfree(new_ns);
+       return ERR_PTR(err);
+}
+
+static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns)
+{
+       return container_of(ns, struct cgroup_namespace, ns);
+}
+
+static int cgroupns_install(struct nsproxy *nsproxy, void *ns)
+{
+       pr_info("setns not supported for cgroup namespace");
+       return -EINVAL;
+}
+
+static struct ns_common *cgroupns_get(struct task_struct *task)
+{
+       struct cgroup_namespace *ns = NULL;
+       struct nsproxy *nsproxy;
+
+       task_lock(task);
+       nsproxy = task->nsproxy;
+       if (nsproxy) {
+               ns = nsproxy->cgroup_ns;
+               get_cgroup_ns(ns);
+       }
+       task_unlock(task);
+
+       return ns ? &ns->ns : NULL;
+}
+
+static void cgroupns_put(struct ns_common *ns)
+{
+       put_cgroup_ns(to_cg_ns(ns));
+}
+
+const struct proc_ns_operations cgroupns_operations = {
+       .name           = "cgroup",
+       .type           = CLONE_NEWCGROUP,
+       .get            = cgroupns_get,
+       .put            = cgroupns_put,
+       .install        = cgroupns_install,
+};
+
+static __init int cgroup_namespaces_init(void)
+{
+       return 0;
+}
+subsys_initcall(cgroup_namespaces_init);
+
  #ifdef CONFIG_CGROUP_DEBUG
  static struct cgroup_subsys_state *
  debug_css_alloc(struct cgroup_subsys_state *parent_css)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c

index 41989ab4db571cbf93d1a12738bc9afc3411e019..d393125b228ca218ce15153f904cabe9c55f7ea5 100644 (file)
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2714,10 +2714,10 @@ int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
                 goto out;
  
         retval = -ENAMETOOLONG;
-       rcu_read_lock();
-       css = task_css(tsk, cpuset_cgrp_id);
-       p = cgroup_path(css->cgroup, buf, PATH_MAX);
-       rcu_read_unlock();
+       css = task_get_css(tsk, cpuset_cgrp_id);
+       p = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
+                          current->nsproxy->cgroup_ns);
+       css_put(css);
         if (!p)
                 goto out_free;
         seq_puts(m, p);
diff --git a/kernel/fork.c b/kernel/fork.c

index 2e391c754ae730bd2d8520c2ab497c403220c6e3..6611a6267949b11a2fc6ad3912d0467001ff6410 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1884,7 +1884,7 @@ static int check_unshare_flags(unsigned long unshare_flags)
         if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
                                 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
                                 CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
-                               CLONE_NEWUSER|CLONE_NEWPID))
+                               CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP))
                 return -EINVAL;
         /*
          * Not implemented, but pretend it works if there is nothing
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c

index 49746c81ad8d28abb3ac85ad82ee9dbc4ce69e81..782102e59eed5b4b5379126b865fb9c795892cdd 100644 (file)
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -25,6 +25,7 @@
  #include <linux/proc_ns.h>
  #include <linux/file.h>
  #include <linux/syscalls.h>
+#include <linux/cgroup.h>
  
  static struct kmem_cache *nsproxy_cachep;
  
@@ -39,6 +40,9 @@ struct nsproxy init_nsproxy = {
  #ifdef CONFIG_NET
         .net_ns                 = &init_net,
  #endif
+#ifdef CONFIG_CGROUPS
+       .cgroup_ns              = &init_cgroup_ns,
+#endif
  };
  
  static inline struct nsproxy *create_nsproxy(void)
@@ -92,6 +96,13 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
                 goto out_pid;
         }
  
+       new_nsp->cgroup_ns = copy_cgroup_ns(flags, user_ns,
+                                           tsk->nsproxy->cgroup_ns);
+       if (IS_ERR(new_nsp->cgroup_ns)) {
+               err = PTR_ERR(new_nsp->cgroup_ns);
+               goto out_cgroup;
+       }
+
         new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
         if (IS_ERR(new_nsp->net_ns)) {
                 err = PTR_ERR(new_nsp->net_ns);
@@ -101,6 +112,8 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
         return new_nsp;
  
  out_net:
+       put_cgroup_ns(new_nsp->cgroup_ns);
+out_cgroup:
         if (new_nsp->pid_ns_for_children)
                 put_pid_ns(new_nsp->pid_ns_for_children);
  out_pid:
@@ -128,7 +141,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
         struct nsproxy *new_ns;
  
         if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-                             CLONE_NEWPID | CLONE_NEWNET)))) {
+                             CLONE_NEWPID | CLONE_NEWNET |
+                             CLONE_NEWCGROUP)))) {
                 get_nsproxy(old_ns);
                 return 0;
         }
@@ -165,6 +179,7 @@ void free_nsproxy(struct nsproxy *ns)
                 put_ipc_ns(ns->ipc_ns);
         if (ns->pid_ns_for_children)
                 put_pid_ns(ns->pid_ns_for_children);
+       put_cgroup_ns(ns->cgroup_ns);
         put_net(ns->net_ns);
         kmem_cache_free(nsproxy_cachep, ns);
  }
@@ -180,7 +195,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
         int err = 0;
  
         if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-                              CLONE_NEWNET | CLONE_NEWPID)))
+                              CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP)))
                 return 0;
  
         user_ns = new_cred ? new_cred->user_ns : current_user_ns();
author	Aditya Kali <adityakali@google.com>
	Fri, 29 Jan 2016 08:54:06 +0000 (02:54 -0600)
committer	Tejun Heo <tj@kernel.org>
	Tue, 16 Feb 2016 18:04:58 +0000 (13:04 -0500)
fs/proc/namespaces.c		patch \| blob \| history
include/linux/cgroup.h		patch \| blob \| history
include/linux/nsproxy.h		patch \| blob \| history
include/linux/proc_ns.h		patch \| blob \| history
kernel/cgroup.c		patch \| blob \| history
kernel/cpuset.c		patch \| blob \| history
kernel/fork.c		patch \| blob \| history
kernel/nsproxy.c		patch \| blob \| history