sparc64: speed up etrap/rtrap on NG2 and later processors
authorAnthony Yznaga <anthony.yznaga@oracle.com>
Fri, 18 Aug 2017 19:40:36 +0000 (12:40 -0700)
committerDavid S. Miller <davem@davemloft.net>
Sun, 10 Sep 2017 03:20:11 +0000 (20:20 -0700)
For many sun4v processor types, reading or writing a privileged register
has a latency of 40 to 70 cycles.  Use a combination of the low-latency
allclean, otherw, normalw, and nop instructions in etrap and rtrap to
replace 2 rdpr and 5 wrpr instructions and improve etrap/rtrap
performance.  allclean, otherw, and normalw are available on NG2 and
later processors.

The average ticks to execute the flush windows trap ("ta 0x3") with and
without this patch on select platforms:

 CPU            Not patched     Patched    % Latency Reduction

 NG2            1762            1558            -11.58
 NG4            3619            3204            -11.47
 M7             3015            2624            -12.97
 SPARC64-X      829             770              -7.12

Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
arch/sparc/include/asm/trap_block.h
arch/sparc/kernel/etrap_64.S
arch/sparc/kernel/rtrap_64.S
arch/sparc/kernel/setup_64.c
arch/sparc/kernel/vmlinux.lds.S

index ff05992dae7a352597bf99fcd2d83500ad0c2995..dfc538609eb2d570ce7eb20cbce5a7bd9e99366d 100644 (file)
@@ -73,6 +73,8 @@ struct sun4v_1insn_patch_entry {
 };
 extern struct sun4v_1insn_patch_entry __sun4v_1insn_patch,
        __sun4v_1insn_patch_end;
+extern struct sun4v_1insn_patch_entry __fast_win_ctrl_1insn_patch,
+       __fast_win_ctrl_1insn_patch_end;
 
 struct sun4v_2insn_patch_entry {
        unsigned int    addr;
index 1276ca2567bab310771d214baa77876ea9e43f0e..5c237467d156f550740ce4ec3723bebf28976940 100644 (file)
@@ -38,7 +38,11 @@ etrap_syscall:       TRAP_LOAD_THREAD_REG(%g6, %g1)
                or      %g1, %g3, %g1
                bne,pn  %xcc, 1f
                 sub    %sp, STACKFRAME_SZ+TRACEREG_SZ-STACK_BIAS, %g2
-               wrpr    %g0, 7, %cleanwin
+661:           wrpr    %g0, 7, %cleanwin
+               .section .fast_win_ctrl_1insn_patch, "ax"
+               .word   661b
+               .word   0x85880000      ! allclean
+               .previous
 
                sethi   %hi(TASK_REGOFF), %g2
                sethi   %hi(TSTATE_PEF), %g3
@@ -88,16 +92,30 @@ etrap_save: save    %g2, -STACK_BIAS, %sp
 
                bne,pn  %xcc, 3f
                 mov    PRIMARY_CONTEXT, %l4
-               rdpr    %canrestore, %g3
+661:           rdpr    %canrestore, %g3
+               .section .fast_win_ctrl_1insn_patch, "ax"
+               .word   661b
+               nop
+               .previous
+
                rdpr    %wstate, %g2
-               wrpr    %g0, 0, %canrestore
+661:           wrpr    %g0, 0, %canrestore
+               .section .fast_win_ctrl_1insn_patch, "ax"
+               .word   661b
+               nop
+               .previous
                sll     %g2, 3, %g2
 
                /* Set TI_SYS_FPDEPTH to 1 and clear TI_SYS_NOERROR.  */
                mov     1, %l5
                sth     %l5, [%l6 + TI_SYS_NOERROR]
 
-               wrpr    %g3, 0, %otherwin
+661:           wrpr    %g3, 0, %otherwin
+               .section .fast_win_ctrl_1insn_patch, "ax"
+               .word   661b
+               .word   0x87880000      ! otherw
+               .previous
+
                wrpr    %g2, 0, %wstate
                sethi   %hi(sparc64_kern_pri_context), %g2
                ldx     [%g2 + %lo(sparc64_kern_pri_context)], %g3
index 709a82ebd294c07bd4b87698c695325e5093a7e4..dff86fad0a1fde57548da37f62a2294714013230 100644 (file)
@@ -224,10 +224,19 @@ rt_continue:      ldx                     [%sp + PTREGS_OFF + PT_V9_G1], %g1
                rdpr                    %otherwin, %l2
                srl                     %l1, 3, %l1
 
-               wrpr                    %l2, %g0, %canrestore
+661:           wrpr                    %l2, %g0, %canrestore
+               .section                .fast_win_ctrl_1insn_patch, "ax"
+               .word                   661b
+               .word                   0x89880000      ! normalw
+               .previous
+
                wrpr                    %l1, %g0, %wstate
                brnz,pt                 %l2, user_rtt_restore
-                wrpr                   %g0, %g0, %otherwin
+661:            wrpr                   %g0, %g0, %otherwin
+               .section                .fast_win_ctrl_1insn_patch, "ax"
+               .word                   661b
+                nop
+               .previous
 
                ldx                     [%g6 + TI_FLAGS], %g3
                wr                      %g0, ASI_AIUP, %asi
index c4088a3b10519e07df35b4e32b8ec5faf01353b6..db4c4d7e28a07a040fd48b39a3698198d41ddd42 100644 (file)
@@ -300,6 +300,11 @@ static void __init sun4v_patch(void)
                break;
        }
 
+       if (sun4v_chip_type != SUN4V_CHIP_NIAGARA1) {
+               sun4v_patch_1insn_range(&__fast_win_ctrl_1insn_patch,
+                                       &__fast_win_ctrl_1insn_patch_end);
+       }
+
        sun4v_hvapi_init();
 }
 
index 34d37e6c2d065aebb0ee0c9dd714dc6785398316..d78847d56a4b492f1ceee02be2fb1acac94c7134 100644 (file)
@@ -159,6 +159,11 @@ SECTIONS
                *(.pud_huge_patch)
                __pud_huge_patch_end = .;
        }
+       .fast_win_ctrl_1insn_patch : {
+               __fast_win_ctrl_1insn_patch = .;
+               *(.fast_win_ctrl_1insn_patch)
+               __fast_win_ctrl_1insn_patch_end = .;
+       }
        PERCPU_SECTION(SMP_CACHE_BYTES)
 
 #ifdef CONFIG_JUMP_LABEL