[IA64] Optimize ticket spinlocks in fsys_rt_sigprocmask
authorPetr Tesarik <ptesarik@suse.cz>
Wed, 15 Sep 2010 22:35:48 +0000 (15:35 -0700)
committerTony Luck <tony.luck@intel.com>
Wed, 15 Sep 2010 22:35:48 +0000 (15:35 -0700)
Tony's fix (f574c843191728d9407b766a027f779dcd27b272) has a small bug,
it incorrectly uses "r3" as a scratch register in the first of the two
unlock paths ... it is also inefficient.  Optimize the fast path again.

Signed-off-by: Petr Tesarik <ptesarik@suse.cz>
Signed-off-by: Tony Luck <tony.luck@intel.com>
arch/ia64/kernel/fsys.S

index 471a1e783acacb799d0f58538637cde7bee55f1a..331d42bda77ae97f457b13f970aa83c40e11d4b0 100644 (file)
@@ -420,34 +420,31 @@ EX(.fail_efault, ld8 r14=[r33])                   // r14 <- *set
        ;;
 
        RSM_PSR_I(p0, r18, r19)                 // mask interrupt delivery
-       mov ar.ccv=0
        andcm r14=r14,r17                       // filter out SIGKILL & SIGSTOP
+       mov r8=EINVAL                   // default to EINVAL
 
 #ifdef CONFIG_SMP
        // __ticket_spin_trylock(r31)
        ld4 r17=[r31]
-       mov r8=EINVAL                   // default to EINVAL
-       ;;
-       extr r9=r17,17,15
        ;;
-       xor r18=r17,r9
+       mov.m ar.ccv=r17
+       extr.u r9=r17,17,15
        adds r19=1,r17
+       extr.u r18=r17,0,15
        ;;
-       extr.u r18=r18,0,15
+       cmp.eq p6,p7=r9,r18
        ;;
-       cmp.eq p0,p7=0,r18
+(p6)   cmpxchg4.acq r9=[r31],r19,ar.ccv
+(p6)   dep.z r20=r19,1,15              // next serving ticket for unlock
 (p7)   br.cond.spnt.many .lock_contention
-       mov.m ar.ccv=r17
-       ;;
-       cmpxchg4.acq r9=[r31],r19,ar.ccv
        ;;
        cmp4.eq p0,p7=r9,r17
+       adds r31=2,r31
 (p7)   br.cond.spnt.many .lock_contention
        ld8 r3=[r2]                     // re-read current->blocked now that we hold the lock
        ;;
 #else
        ld8 r3=[r2]                     // re-read current->blocked now that we hold the lock
-       mov r8=EINVAL                   // default to EINVAL
 #endif
        add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
        add r19=IA64_TASK_SIGNAL_OFFSET,r16
@@ -503,16 +500,8 @@ EX(.fail_efault, ld8 r14=[r33])                    // r14 <- *set
 
 #ifdef CONFIG_SMP
        // __ticket_spin_unlock(r31)
-       adds r31=2,r31
-       ;;
-       ld2.bias r2=[r31]
-       mov r3=65534
-       ;;
-       adds r2=2,r2
-       ;;
-       and r3=r3,r2
-       ;;
-       st2.rel [r31]=r3
+       st2.rel [r31]=r20
+       mov r20=0                                       // i must not leak kernel bits...
 #endif
        SSM_PSR_I(p0, p9, r31)
        ;;
@@ -535,16 +524,7 @@ EX(.fail_efault, (p15) st8 [r34]=r3)
 .sig_pending:
 #ifdef CONFIG_SMP
        // __ticket_spin_unlock(r31)
-       adds r31=2,r31
-       ;;
-       ld2.bias r2=[r31]
-       mov r3=65534
-       ;;
-       adds r2=2,r2
-       ;;
-       and r3=r3,r2
-       ;;
-       st2.rel [r31]=r3
+       st2.rel [r31]=r20                       // release the lock
 #endif
        SSM_PSR_I(p0, p9, r17)
        ;;