x86, mem: memmove_64.S: Optimize memmove by enhanced REP MOVSB/STOSB
authorFenghua Yu <fenghua.yu@intel.com>
Tue, 17 May 2011 22:29:17 +0000 (15:29 -0700)
committerH. Peter Anvin <hpa@linux.intel.com>
Tue, 17 May 2011 22:40:30 +0000 (15:40 -0700)
Support memmove() by enhanced rep movsb. On processors supporting enhanced
REP MOVSB/STOSB, the alternative memmove() function using enhanced rep movsb
overrides the original function.

The patch doesn't change the backward memmove case to use enhanced rep
movsb.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1305671358-14478-9-git-send-email-fenghua.yu@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
arch/x86/lib/memmove_64.S

index 0ecb8433e5a8e2f62c0a678da69ccd16f233b304..d0ec9c2936d75fb6e7c908c00f2359fd0c366dd5 100644 (file)
@@ -8,6 +8,7 @@
 #define _STRING_C
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
 
 #undef memmove
 
@@ -24,6 +25,7 @@
  */
 ENTRY(memmove)
        CFI_STARTPROC
+
        /* Handle more 32bytes in loop */
        mov %rdi, %rax
        cmp $0x20, %rdx
@@ -31,8 +33,13 @@ ENTRY(memmove)
 
        /* Decide forward/backward copy mode */
        cmp %rdi, %rsi
-       jb      2f
+       jge .Lmemmove_begin_forward
+       mov %rsi, %r8
+       add %rdx, %r8
+       cmp %rdi, %r8
+       jg 2f
 
+.Lmemmove_begin_forward:
        /*
         * movsq instruction have many startup latency
         * so we handle small size by general register.
@@ -78,6 +85,8 @@ ENTRY(memmove)
        rep movsq
        movq %r11, (%r10)
        jmp 13f
+.Lmemmove_end_forward:
+
        /*
         * Handle data backward by movsq.
         */
@@ -194,4 +203,22 @@ ENTRY(memmove)
 13:
        retq
        CFI_ENDPROC
+
+       .section .altinstr_replacement,"ax"
+.Lmemmove_begin_forward_efs:
+       /* Forward moving data. */
+       movq %rdx, %rcx
+       rep movsb
+       retq
+.Lmemmove_end_forward_efs:
+       .previous
+
+       .section .altinstructions,"a"
+       .align 8
+       .quad .Lmemmove_begin_forward
+       .quad .Lmemmove_begin_forward_efs
+       .word X86_FEATURE_ERMS
+       .byte .Lmemmove_end_forward-.Lmemmove_begin_forward
+       .byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
+       .previous
 ENDPROC(memmove)