pid namespaces: define is_global_init() and is_container_init()
authorSerge E. Hallyn <serue@us.ibm.com>
Fri, 19 Oct 2007 06:39:52 +0000 (23:39 -0700)
committerLinus Torvalds <torvalds@woody.linux-foundation.org>
Fri, 19 Oct 2007 18:53:37 +0000 (11:53 -0700)
is_init() is an ambiguous name for the pid==1 check.  Split it into
is_global_init() and is_container_init().

A cgroup init has it's tsk->pid == 1.

A global init also has it's tsk->pid == 1 and it's active pid namespace
is the init_pid_ns.  But rather than check the active pid namespace,
compare the task structure with 'init_pid_ns.child_reaper', which is
initialized during boot to the /sbin/init process and never changes.

Changelog:

2.6.22-rc4-mm2-pidns1:
- Use 'init_pid_ns.child_reaper' to determine if a given task is the
  global init (/sbin/init) process. This would improve performance
  and remove dependence on the task_pid().

2.6.21-mm2-pidns2:

- [Sukadev Bhattiprolu] Changed is_container_init() calls in {powerpc,
  ppc,avr32}/traps.c for the _exception() call to is_global_init().
  This way, we kill only the cgroup if the cgroup's init has a
  bug rather than force a kernel panic.

[akpm@linux-foundation.org: fix comment]
[sukadev@us.ibm.com: Use is_global_init() in arch/m32r/mm/fault.c]
[bunk@stusta.de: kernel/pid.c: remove unused exports]
[sukadev@us.ibm.com: Fix capability.c to work with threaded init]
Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Acked-by: Pavel Emelianov <xemul@openvz.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Herbert Poetzel <herbert@13thfloor.at>
Cc: Kirill Korotaev <dev@sw.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
32 files changed:
arch/alpha/mm/fault.c
arch/arm/mm/fault.c
arch/avr32/kernel/traps.c
arch/avr32/mm/fault.c
arch/ia64/mm/fault.c
arch/m32r/mm/fault.c
arch/m68k/mm/fault.c
arch/mips/mm/fault.c
arch/powerpc/kernel/traps.c
arch/powerpc/mm/fault.c
arch/powerpc/platforms/pseries/ras.c
arch/ppc/kernel/traps.c
arch/ppc/mm/fault.c
arch/s390/lib/uaccess_pt.c
arch/s390/mm/fault.c
arch/sh/mm/fault.c
arch/sh64/mm/fault.c
arch/um/kernel/trap.c
arch/x86/lib/usercopy_32.c
arch/x86/mm/fault_32.c
arch/x86/mm/fault_64.c
arch/xtensa/mm/fault.c
drivers/char/sysrq.c
include/linux/sched.h
kernel/capability.c
kernel/exit.c
kernel/kexec.c
kernel/pid.c
kernel/signal.c
kernel/sysctl.c
mm/oom_kill.c
security/commoncap.c

index 25154df3055abf7a501db5b5b00255d0bcbb410e..e0593e606140057ef7da165763b71e7c47491837 100644 (file)
@@ -188,7 +188,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
        /* We ran out of memory, or some other thing happened to us that
           made us unable to handle the page fault gracefully.  */
  out_of_memory:
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
index 59ed1d05b71bf6d858e515d928da4ad102276d91..a8a7dab757eb4977bfc88201da37dfe85713cdbc 100644 (file)
@@ -197,7 +197,7 @@ survive:
        return fault;
 
 out_of_memory:
-       if (!is_init(tsk))
+       if (!is_global_init(tsk))
                goto out;
 
        /*
index 9a73ce7eb50fb1da8e6c04ca41c327108b602b6f..8a7caf8e7b454da39a915426a1d7ea363faf361f 100644 (file)
@@ -89,7 +89,7 @@ void _exception(long signr, struct pt_regs *regs, int code,
         * generate the same exception over and over again and we get
         * nowhere.  Better to kill it and let the kernel panic.
         */
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                __sighandler_t handler;
 
                spin_lock_irq(&current->sighand->siglock);
index 11472f8701bdf4a88202d81e723efef38208e743..6560cb18b4e3403e498cb9076ea7025b61cbb2d0 100644 (file)
@@ -160,7 +160,7 @@ bad_area:
                if (exception_trace && printk_ratelimit())
                        printk("%s%s[%d]: segfault at %08lx pc %08lx "
                               "sp %08lx ecr %lu\n",
-                              is_init(tsk) ? KERN_EMERG : KERN_INFO,
+                              is_global_init(tsk) ? KERN_EMERG : KERN_INFO,
                               tsk->comm, tsk->pid, address, regs->pc,
                               regs->sp, ecr);
                _exception(SIGSEGV, regs, code, address);
@@ -209,7 +209,7 @@ no_context:
         */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
@@ -231,7 +231,7 @@ do_sigbus:
        if (exception_trace)
                printk("%s%s[%d]: bus error at %08lx pc %08lx "
                       "sp %08lx ecr %lu\n",
-                      is_init(tsk) ? KERN_EMERG : KERN_INFO,
+                      is_global_init(tsk) ? KERN_EMERG : KERN_INFO,
                       tsk->comm, tsk->pid, address, regs->pc,
                       regs->sp, ecr);
 
index 32f26253c4e8cea6b938e8babdffb96e826e6ac8..7571076a16a1991742af9f502b2113f7d9bd35a4 100644 (file)
@@ -274,7 +274,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 
   out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
index 70a766aad3e0bdbcdc22b6dfd11033d3183846aa..4a71df4c1b3022b1c8f324b50d78dd2b79f8e51a 100644 (file)
@@ -271,7 +271,7 @@ no_context:
  */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(tsk)) {
+       if (is_global_init(tsk)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
index eaa618681159524ec1cf287b2ff1c91bf72d2da7..f493f03231d5ad3594d0813bd11a8855e9efcfbb 100644 (file)
@@ -180,7 +180,7 @@ good_area:
  */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
index 5699c7713e2f01c5b42f7803df82c61c4b48ec8c..fa636fc6b7b90403a2a8771ef3c5f023a1a82cd4 100644 (file)
@@ -173,7 +173,7 @@ no_context:
  */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(tsk)) {
+       if (is_global_init(tsk)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
index bf9e39c6e296209f0f6bc8f5f08c35286df279d4..9fb4a6849c5a591c2e7aa7edb3452029f69d8190 100644 (file)
@@ -201,7 +201,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
         * generate the same exception over and over again and we get
         * nowhere.  Better to kill it and let the kernel panic.
         */
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                __sighandler_t handler;
 
                spin_lock_irq(&current->sighand->siglock);
index ab3546c5ac3a2a8f4b022ba8150c13a9c7686728..a18fda361cc0fe19103add8034d5b3485b012079 100644 (file)
@@ -375,7 +375,7 @@ bad_area_nosemaphore:
  */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
index 3a393c7f390e1cce07e0ee0d1c4c7bfc24284057..a1ab25c7082f1031343d492445c37ce3845580dd 100644 (file)
@@ -332,7 +332,7 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err)
                   err->disposition == RTAS_DISP_NOT_RECOVERED &&
                   err->target == RTAS_TARGET_MEMORY &&
                   err->type == RTAS_TYPE_ECC_UNCORR &&
-                  !(current->pid == 0 || is_init(current))) {
+                  !(current->pid == 0 || is_global_init(current))) {
                /* Kill off a user process with an ECC error */
                printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n",
                       current->pid);
index 3f3b292eb773ba80d2d3f79bdfd231bbcea95ecb..c78568905c3b1c2b7676446748e57774a737399a 100644 (file)
@@ -121,7 +121,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
         * generate the same exception over and over again and we get
         * nowhere.  Better to kill it and let the kernel panic.
         */
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                __sighandler_t handler;
 
                spin_lock_irq(&current->sighand->siglock);
index 94913ddcf76e8524392dec4f5d9b53bccbc2011f..254c23b755e689006a7073a13a57cbf35db4643b 100644 (file)
@@ -290,7 +290,7 @@ bad_area:
  */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
index 60604b2819b2a0d07c4664860ecb4c462ee92ee1..b159a9d656807f9283102e5cbeee2d9920ab9b27 100644 (file)
@@ -64,7 +64,7 @@ out:
 
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
index 14c241ccdd4d9aa47c8697cdb4f9492821c1e224..2456b52ed0687e4ac588fa47ae860cc0973636fa 100644 (file)
@@ -211,7 +211,7 @@ static int do_out_of_memory(struct pt_regs *regs, unsigned long error_code,
        struct mm_struct *mm = tsk->mm;
 
        up_read(&mm->mmap_sem);
-       if (is_init(tsk)) {
+       if (is_global_init(tsk)) {
                yield();
                down_read(&mm->mmap_sem);
                return 1;
index 4729668ce5bf3b4ca261d689906ed14db30e01e5..f33cedb353fc6af215bc7f86b37120553d8e7532 100644 (file)
@@ -207,7 +207,7 @@ no_context:
  */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
index dd81c669c79b5b63df7c52447f320c96d8a15dd4..7aea586fc3d0962a6c1bf9e22f29863d26454030 100644 (file)
@@ -278,7 +278,7 @@ bad_area:
                        show_regs(regs);
 #endif
                }
-               if (is_init(tsk)) {
+               if (is_global_init(tsk)) {
                        panic("INIT had user mode bad_area\n");
                }
                tsk->thread.address = address;
@@ -320,14 +320,14 @@ no_context:
  * us unable to handle the page fault gracefully.
  */
 out_of_memory:
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                panic("INIT out of memory\n");
                yield();
                goto survive;
        }
        printk("fault:Out of memory\n");
        up_read(&mm->mmap_sem);
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
index bd060551e6190d6fc92b2148840f94c9e71098c9..cb3321f8e0a91924167b0f83ea7f34c30df55080 100644 (file)
@@ -108,7 +108,7 @@ out_nosemaphore:
  * us unable to handle the page fault gracefully.
  */
 out_of_memory:
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                up_read(&mm->mmap_sem);
                yield();
                down_read(&mm->mmap_sem);
index 9f38b12b4af1bf103b1261b1ceee185c541a28cd..8bab2b2efaff86c2707ccc929b996f5a245cecd5 100644 (file)
@@ -748,7 +748,7 @@ survive:
                        retval = get_user_pages(current, current->mm,
                                        (unsigned long )to, 1, 1, 0, &pg, NULL);
 
-                       if (retval == -ENOMEM && is_init(current)) {
+                       if (retval == -ENOMEM && is_global_init(current)) {
                                up_read(&current->mm->mmap_sem);
                                congestion_wait(WRITE, HZ/50);
                                goto survive;
index 6555c3d143716c74dc97d466ffafd1baf1399f35..4fc5e400cf0ab82a3f7769aa1d7e4aea1a95c214 100644 (file)
@@ -587,7 +587,7 @@ no_context:
  */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(tsk)) {
+       if (is_global_init(tsk)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
index 5e0e54906c488fe5b7266d013c072672a606ba29..5149ac136a5db0bcc25330b86b188b71e570b858 100644 (file)
@@ -554,7 +554,7 @@ no_context:
  */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                yield();
                goto again;
        }
index 2f842859948f5993780b83fae5d3e66390bf1d6d..33f366be323fc05f075519a0dde7826795f8c568 100644 (file)
@@ -145,7 +145,7 @@ bad_area:
         */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
index 78d14935f2b8c4b1dbb4678f0d918858c6a15832..de60e1ea4fb3830e01524b73381738d7e98485b6 100644 (file)
@@ -251,7 +251,7 @@ static void send_sig_all(int sig)
        struct task_struct *p;
 
        for_each_process(p) {
-               if (p->mm && !is_init(p))
+               if (p->mm && !is_global_init(p))
                        /* Not swapper, init nor kernel thread */
                        force_sig(sig, p);
        }
index df6049e5e8bfeef147e68aa212824904813ff007..47cf81d6204782693d9f7f36d0de2f3b518c7169 100644 (file)
@@ -1237,12 +1237,20 @@ static inline int pid_alive(struct task_struct *p)
 }
 
 /**
- * is_init - check if a task structure is init
+ * is_global_init - check if a task structure is init
  * @tsk: Task structure to be checked.
  *
  * Check if a task structure is the first user space task the kernel created.
+ *
+ * TODO: We should inline this function after some cleanups in pid_namespace.h
+ */
+extern int is_global_init(struct task_struct *tsk);
+
+/*
+ * is_container_init:
+ * check whether in the task is init in its own pid namespace.
  */
-static inline int is_init(struct task_struct *tsk)
+static inline int is_container_init(struct task_struct *tsk)
 {
        return tsk->pid == 1;
 }
index cbc5fd60c0f318dce6c1cdcd33b4cf5b5d37f6a1..f02ad47320b92c9536d2c287f2bfb6a72c8a1ef2 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/pid_namespace.h>
 #include <asm/uaccess.h>
 
 /*
@@ -129,7 +130,7 @@ static inline int cap_set_all(kernel_cap_t *effective,
      int found = 0;
 
      do_each_thread(g, target) {
-             if (target == current || is_init(target))
+             if (target == current || is_container_init(target->group_leader))
                      continue;
              found = 1;
             if (security_capset_check(target, effective, inheritable,
index d1eddc753fe3df0db57dc13aa52696de6408eec9..d22aefabb1294600248aac455ea6128f6e6fa73f 100644 (file)
@@ -221,7 +221,7 @@ static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignor
        do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
                if (p == ignored_task
                                || p->exit_state
-                               || is_init(p->real_parent))
+                               || is_global_init(p->real_parent))
                        continue;
                if (task_pgrp(p->real_parent) != pgrp &&
                    task_session(p->real_parent) == task_session(p)) {
index e9f1b4ea504d920bd514deb8868536fd10de77bb..fbffdb457cce0e6f44cbef31285ec9de3e68f686 100644 (file)
@@ -51,7 +51,7 @@ struct resource crashk_res = {
 
 int kexec_should_crash(struct task_struct *p)
 {
-       if (in_interrupt() || !p->pid || is_init(p) || panic_on_oops)
+       if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
                return 1;
        return 0;
 }
index 78c0dbffde654c22e4f6c14482e6aaf3f89e1147..bb0785109d39998c63ec881bb4f92fed69efaa87 100644 (file)
@@ -70,6 +70,11 @@ struct pid_namespace init_pid_ns = {
        .child_reaper = &init_task
 };
 
+int is_global_init(struct task_struct *tsk)
+{
+       return tsk == init_pid_ns.child_reaper;
+}
+
 /*
  * Note: disable interrupts while the pidmap_lock is held as an
  * interrupt might come in and do read_lock(&tasklist_lock).
index 0a6d3726cb80bd960f468492723438608b4a0ea1..8214ffad54bcebbbe63b639115d0bd983ba55101 100644 (file)
@@ -256,7 +256,7 @@ flush_signal_handlers(struct task_struct *t, int force_default)
 
 int unhandled_signal(struct task_struct *tsk, int sig)
 {
-       if (is_init(tsk))
+       if (is_global_init(tsk))
                return 1;
        if (tsk->ptrace & PT_PTRACED)
                return 0;
index 067554bda8b79ba3954f2dc653dfa4f25f98f09c..44868e4df1d3deeea567c62a7c7c17e0251931b2 100644 (file)
@@ -1888,7 +1888,7 @@ int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp,
                return -EPERM;
        }
 
-       op = is_init(current) ? OP_SET : OP_AND;
+       op = is_global_init(current) ? OP_SET : OP_AND;
        return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
                                do_proc_dointvec_bset_conv,&op);
 }
index a64decb5b13fb3922e05a70559855dde685b57b4..b1c2d0f862221fbf7c00aab0e6200745dd5ab805 100644 (file)
@@ -212,7 +212,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
                if (!p->mm)
                        continue;
                /* skip the init task */
-               if (is_init(p))
+               if (is_global_init(p))
                        continue;
 
                /*
@@ -265,7 +265,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
  */
 static void __oom_kill_task(struct task_struct *p, int verbose)
 {
-       if (is_init(p)) {
+       if (is_global_init(p)) {
                WARN_ON(1);
                printk(KERN_WARNING "tried to kill init!\n");
                return;
index 48ca5b092768fdde28e1df98b6420e47af0e82b1..43f902750a1b6c802d650b6e9bbafde70ec8ae70 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/xattr.h>
 #include <linux/hugetlb.h>
 #include <linux/mount.h>
+#include <linux/sched.h>
 
 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES
 /*
@@ -334,7 +335,7 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
        /* For init, we want to retain the capabilities set
         * in the init_task struct. Thus we skip the usual
         * capability rules */
-       if (!is_init(current)) {
+       if (!is_global_init(current)) {
                current->cap_permitted = new_permitted;
                current->cap_effective = bprm->cap_effective ?
                                new_permitted : 0;