ia64: switch to generic kernel_thread()/kernel_execve()
authorAl Viro <viro@zeniv.linux.org.uk>
Sun, 14 Oct 2012 19:43:06 +0000 (15:43 -0400)
committerAl Viro <viro@zeniv.linux.org.uk>
Fri, 19 Oct 2012 18:28:09 +0000 (14:28 -0400)
Acked-by: Tony Luck <tony.luck@intel.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
arch/ia64/Kconfig
arch/ia64/include/asm/processor.h
arch/ia64/kernel/entry.S
arch/ia64/kernel/head.S
arch/ia64/kernel/process.c

index 3279646120e3be5bdbc970ca5297b5fce30011ee..6706004681280ab211e11a8836f71535d4c1bd9f 100644 (file)
@@ -42,6 +42,8 @@ config IA64
        select GENERIC_TIME_VSYSCALL_OLD
        select HAVE_MOD_ARCH_SPECIFIC
        select MODULES_USE_ELF_RELA
+       select GENERIC_KERNEL_THREAD
+       select GENERIC_KERNEL_EXECVE
        default y
        help
          The Itanium Processor Family is Intel's 64-bit successor to
index 944152a5091223798fb3427b2dc5bb56ecaef4be..e0a899a1a8a665c140dba6255dbbde7528a16c79 100644 (file)
@@ -340,22 +340,6 @@ struct task_struct;
  */
 #define release_thread(dead_task)
 
-/*
- * This is the mechanism for creating a new kernel thread.
- *
- * NOTE 1: Only a kernel-only process (ie the swapper or direct
- * descendants who haven't done an "execve()") should use this: it
- * will work within a system call from a "real" process, but the
- * process memory space will not be free'd until both the parent and
- * the child have exited.
- *
- * NOTE 2: This MUST NOT be an inlined function.  Otherwise, we get
- * into trouble in init/main.c when the child thread returns to
- * do_basic_setup() and the timing is such that free_initmem() has
- * been called already.
- */
-extern pid_t kernel_thread (int (*fn)(void *), void *arg, unsigned long flags);
-
 /* Get wait channel for task P.  */
 extern unsigned long get_wchan (struct task_struct *p);
 
index 6b0648d97b4c9abd22fdac8843d732496fc8f15a..0dea684e19053fd06c88bc30c22122c2ac98b2f6 100644 (file)
@@ -484,13 +484,6 @@ GLOBAL_ENTRY(prefetch_stack)
        br.ret.sptk.many rp
 END(prefetch_stack)
 
-GLOBAL_ENTRY(kernel_execve)
-       rum psr.ac
-       mov r15=__NR_execve                     // put syscall number in place
-       break __BREAK_SYSCALL
-       br.ret.sptk.many rp
-END(kernel_execve)
-
        /*
         * Invoke a system call, but do some tracing before and after the call.
         * We MUST preserve the current register frame throughout this routine
@@ -594,6 +587,27 @@ GLOBAL_ENTRY(ia64_strace_leave_kernel)
 .ret4: br.cond.sptk ia64_leave_kernel
 END(ia64_strace_leave_kernel)
 
+ENTRY(call_payload)
+       .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(0)
+       /* call the kernel_thread payload; fn is in r4, arg - in r5 */
+       alloc loc1=ar.pfs,0,3,1,0
+       mov loc0=rp
+       mov loc2=gp
+       mov out0=r5             // arg
+       ld8 r14 = [r4], 8       // fn.address
+       ;;
+       mov b6 = r14
+       ld8 gp = [r4]           // fn.gp
+       ;;
+       br.call.sptk.many rp=b6 // fn(arg)
+.ret12:        mov gp=loc2
+       mov rp=loc0
+       mov ar.pfs=loc1
+       /* ... and if it has returned, we are going to userland */
+       cmp.ne pKStk,pUStk=r0,r0
+       br.ret.sptk.many rp
+END(call_payload)
+
 GLOBAL_ENTRY(ia64_ret_from_clone)
        PT_REGS_UNWIND_INFO(0)
 {      /*
@@ -610,6 +624,7 @@ GLOBAL_ENTRY(ia64_ret_from_clone)
        br.call.sptk.many rp=ia64_invoke_schedule_tail
 }
 .ret8:
+(pKStk)        br.call.sptk.many rp=call_payload
        adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
        ;;
        ld4 r2=[r2]
index 629a250f7c190376c6976d679f427044a0209eda..4738ff7bd66a28e35b260ea3706058fbf56adbda 100644 (file)
@@ -1093,19 +1093,6 @@ GLOBAL_ENTRY(cycle_to_cputime)
 END(cycle_to_cputime)
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 
-GLOBAL_ENTRY(start_kernel_thread)
-       .prologue
-       .save rp, r0                            // this is the end of the call-chain
-       .body
-       alloc r2 = ar.pfs, 0, 0, 2, 0
-       mov out0 = r9
-       mov out1 = r11;;
-       br.call.sptk.many rp = kernel_thread_helper;;
-       mov out0 = r8
-       br.call.sptk.many rp = sys_exit;;
-1:     br.sptk.few 1b                          // not reached
-END(start_kernel_thread)
-
 #ifdef CONFIG_IA64_BRL_EMU
 
 /*
index 6a48775d93632c1d7275c409c5a6deff6799ceee..37686dbfd264e11db58070a0235f03e5a733529f 100644 (file)
@@ -401,55 +401,15 @@ copy_thread(unsigned long clone_flags,
        struct pt_regs *child_ptregs;
        int retval = 0;
 
-       stack = ((struct switch_stack *) regs) - 1;
-
        child_ptregs = (struct pt_regs *) ((unsigned long) p + IA64_STK_OFFSET) - 1;
        child_stack = (struct switch_stack *) child_ptregs - 1;
 
-       /* copy parent's switch_stack & pt_regs to child: */
-       memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack));
-
        rbs = (unsigned long) current + IA64_RBS_OFFSET;
        child_rbs = (unsigned long) p + IA64_RBS_OFFSET;
 
-       if (likely(user_mode(child_ptregs))) {
-               /* copy the parent's register backing store to the child: */
-               rbs_size = stack->ar_bspstore - rbs;
-               memcpy((void *) child_rbs, (void *) rbs, rbs_size);
-               if (clone_flags & CLONE_SETTLS)
-                       child_ptregs->r13 = regs->r16;  /* see sys_clone2() in entry.S */
-               if (user_stack_base) {
-                       child_ptregs->r12 = user_stack_base + user_stack_size - 16;
-                       child_ptregs->ar_bspstore = user_stack_base;
-                       child_ptregs->ar_rnat = 0;
-                       child_ptregs->loadrs = 0;
-               }
-       } else {
-               /*
-                * Note: we simply preserve the relative position of
-                * the stack pointer here.  There is no need to
-                * allocate a scratch area here, since that will have
-                * been taken care of by the caller of sys_clone()
-                * already.
-                */
-               rbs_size = 0;
-               child_ptregs->r12 = (unsigned long) child_ptregs - 16; /* kernel sp */
-               child_ptregs->r13 = (unsigned long) p;          /* set `current' pointer */
-       }
-       child_stack->ar_bspstore = child_rbs + rbs_size;
-       child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
-
        /* copy parts of thread_struct: */
        p->thread.ksp = (unsigned long) child_stack - 16;
 
-       /* stop some PSR bits from being inherited.
-        * the psr.up/psr.pp bits must be cleared on fork but inherited on execve()
-        * therefore we must specify them explicitly here and not include them in
-        * IA64_PSR_BITS_TO_CLEAR.
-        */
-       child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
-                                & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP));
-
        /*
         * NOTE: The calling convention considers all floating point
         * registers in the high partition (fph) to be scratch.  Since
@@ -471,8 +431,66 @@ copy_thread(unsigned long clone_flags,
 #      define THREAD_FLAGS_TO_SET      0
        p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR)
                           | THREAD_FLAGS_TO_SET);
+
        ia64_drop_fpu(p);       /* don't pick up stale state from a CPU's fph */
 
+       if (unlikely(p->flags & PF_KTHREAD)) {
+               if (unlikely(!user_stack_base)) {
+                       /* fork_idle() called us */
+                       return 0;
+               }
+               memset(child_stack, 0, sizeof(*child_ptregs) + sizeof(*child_stack));
+               child_stack->r4 = user_stack_base;      /* payload */
+               child_stack->r5 = user_stack_size;      /* argument */
+               /*
+                * Preserve PSR bits, except for bits 32-34 and 37-45,
+                * which we can't read.
+                */
+               child_ptregs->cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN;
+               /* mark as valid, empty frame */
+               child_ptregs->cr_ifs = 1UL << 63;
+               child_stack->ar_fpsr = child_ptregs->ar_fpsr
+                       = ia64_getreg(_IA64_REG_AR_FPSR);
+               child_stack->pr = (1 << PRED_KERNEL_STACK);
+               child_stack->ar_bspstore = child_rbs;
+               child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
+
+               /* stop some PSR bits from being inherited.
+                * the psr.up/psr.pp bits must be cleared on fork but inherited on execve()
+                * therefore we must specify them explicitly here and not include them in
+                * IA64_PSR_BITS_TO_CLEAR.
+                */
+               child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
+                                & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP));
+
+               return 0;
+       }
+       stack = ((struct switch_stack *) regs) - 1;
+       /* copy parent's switch_stack & pt_regs to child: */
+       memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack));
+
+       /* copy the parent's register backing store to the child: */
+       rbs_size = stack->ar_bspstore - rbs;
+       memcpy((void *) child_rbs, (void *) rbs, rbs_size);
+       if (clone_flags & CLONE_SETTLS)
+               child_ptregs->r13 = regs->r16;  /* see sys_clone2() in entry.S */
+       if (user_stack_base) {
+               child_ptregs->r12 = user_stack_base + user_stack_size - 16;
+               child_ptregs->ar_bspstore = user_stack_base;
+               child_ptregs->ar_rnat = 0;
+               child_ptregs->loadrs = 0;
+       }
+       child_stack->ar_bspstore = child_rbs + rbs_size;
+       child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
+
+       /* stop some PSR bits from being inherited.
+        * the psr.up/psr.pp bits must be cleared on fork but inherited on execve()
+        * therefore we must specify them explicitly here and not include them in
+        * IA64_PSR_BITS_TO_CLEAR.
+        */
+       child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
+                                & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP));
+
 #ifdef CONFIG_PERFMON
        if (current->thread.pfm_context)
                pfm_inherit(p, child_ptregs);
@@ -618,37 +636,6 @@ out:
        return error;
 }
 
-pid_t
-kernel_thread (int (*fn)(void *), void *arg, unsigned long flags)
-{
-       extern void start_kernel_thread (void);
-       unsigned long *helper_fptr = (unsigned long *) &start_kernel_thread;
-       struct {
-               struct switch_stack sw;
-               struct pt_regs pt;
-       } regs;
-
-       memset(&regs, 0, sizeof(regs));
-       regs.pt.cr_iip = helper_fptr[0];        /* set entry point (IP) */
-       regs.pt.r1 = helper_fptr[1];            /* set GP */
-       regs.pt.r9 = (unsigned long) fn;        /* 1st argument */
-       regs.pt.r11 = (unsigned long) arg;      /* 2nd argument */
-       /* Preserve PSR bits, except for bits 32-34 and 37-45, which we can't read.  */
-       regs.pt.cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN;
-       regs.pt.cr_ifs = 1UL << 63;             /* mark as valid, empty frame */
-       regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR);
-       regs.sw.pr = (1 << PRED_KERNEL_STACK);
-       return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs.pt, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
-/* This gets called from kernel_thread() via ia64_invoke_thread_helper().  */
-int
-kernel_thread_helper (int (*fn)(void *), void *arg)
-{
-       return (*fn)(arg);
-}
-
 /*
  * Flush thread state.  This is called when a thread does an execve().
  */