s390: implement memset16, memset32 & memset64
authorHeiko Carstens <heiko.carstens@de.ibm.com>
Wed, 4 Oct 2017 17:27:05 +0000 (19:27 +0200)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Mon, 9 Oct 2017 09:18:04 +0000 (11:18 +0200)
Provide fast versions of the new memset variants. E.g. the generic
memset64 is ten times slower than the optimized version if used on a
whole page.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
arch/s390/include/asm/string.h
arch/s390/lib/mem.S

index 8fb43319693db17f21dde49624784c7a0cd9698d..aa9c3a0f59ff418955261433e003685dae269800 100644 (file)
@@ -17,6 +17,9 @@
 #define __HAVE_ARCH_MEMMOVE    /* gcc builtin & arch function */
 #define __HAVE_ARCH_MEMSCAN    /* inline & arch function */
 #define __HAVE_ARCH_MEMSET     /* gcc builtin & arch function */
+#define __HAVE_ARCH_MEMSET16   /* arch function */
+#define __HAVE_ARCH_MEMSET32   /* arch function */
+#define __HAVE_ARCH_MEMSET64   /* arch function */
 #define __HAVE_ARCH_STRCAT     /* inline & arch function */
 #define __HAVE_ARCH_STRCMP     /* arch function */
 #define __HAVE_ARCH_STRCPY     /* inline & arch function */
@@ -49,6 +52,25 @@ extern char *strstr(const char *, const char *);
 #undef __HAVE_ARCH_STRSEP
 #undef __HAVE_ARCH_STRSPN
 
+void *__memset16(uint16_t *s, uint16_t v, size_t count);
+void *__memset32(uint32_t *s, uint32_t v, size_t count);
+void *__memset64(uint64_t *s, uint64_t v, size_t count);
+
+static inline void *memset16(uint16_t *s, uint16_t v, size_t count)
+{
+       return __memset16(s, v, count * sizeof(v));
+}
+
+static inline void *memset32(uint32_t *s, uint32_t v, size_t count)
+{
+       return __memset32(s, v, count * sizeof(v));
+}
+
+static inline void *memset64(uint64_t *s, uint64_t v, size_t count)
+{
+       return __memset64(s, v, count * sizeof(v));
+}
+
 #if !defined(IN_ARCH_STRING_C) && (!defined(CONFIG_FORTIFY_SOURCE) || defined(__NO_FORTIFY))
 
 static inline void *memchr(const void * s, int c, size_t n)
index 7ff79a4ff00cd9fae70d83f4674f6f8a01033967..f88cf6983849bb30f0b2b9d61b434ad36b8e7a79 100644 (file)
@@ -126,3 +126,47 @@ ENTRY(memcpy)
 .Lmemcpy_mvc:
        mvc     0(1,%r1),0(%r3)
 EXPORT_SYMBOL(memcpy)
+
+/*
+ * __memset16/32/64
+ *
+ * void *__memset16(uint16_t *s, uint16_t v, size_t count)
+ * void *__memset32(uint32_t *s, uint32_t v, size_t count)
+ * void *__memset64(uint64_t *s, uint64_t v, size_t count)
+ */
+.macro __MEMSET bits,bytes,insn
+ENTRY(__memset\bits)
+       ltgr    %r4,%r4
+       bzr     %r14
+       cghi    %r4,\bytes
+       je      .L__memset_exit\bits
+       aghi    %r4,-(\bytes+1)
+       srlg    %r5,%r4,8
+       ltgr    %r5,%r5
+       lgr     %r1,%r2
+       jz      .L__memset_remainder\bits
+.L__memset_loop\bits:
+       \insn   %r3,0(%r1)
+       mvc     \bytes(256-\bytes,%r1),0(%r1)
+       la      %r1,256(%r1)
+       brctg   %r5,.L__memset_loop\bits
+.L__memset_remainder\bits:
+       \insn   %r3,0(%r1)
+       larl    %r5,.L__memset_mvc\bits
+       ex      %r4,0(%r5)
+       br      %r14
+.L__memset_exit\bits:
+       \insn   %r3,0(%r2)
+       br      %r14
+.L__memset_mvc\bits:
+       mvc     \bytes(1,%r1),0(%r1)
+.endm
+
+__MEMSET 16,2,sth
+EXPORT_SYMBOL(__memset16)
+
+__MEMSET 32,4,st
+EXPORT_SYMBOL(__memset32)
+
+__MEMSET 64,8,stg
+EXPORT_SYMBOL(__memset64)