parisc: Add assembly implementations for memset, strlen, strcpy, strncpy and strcat
authorHelge Deller <deller@gmx.de>
Wed, 6 Feb 2019 22:21:10 +0000 (23:21 +0100)
committerHelge Deller <deller@gmx.de>
Sat, 3 Aug 2019 06:56:57 +0000 (08:56 +0200)
Add performance-optimized versions of some string functions.

Signed-off-by: Helge Deller <deller@gmx.de>
Tested-by: Sven Schnelle <svens@stackframe.org>
arch/parisc/include/asm/string.h
arch/parisc/kernel/parisc_ksyms.c
arch/parisc/lib/Makefile
arch/parisc/lib/memset.c [deleted file]
arch/parisc/lib/string.S [new file with mode: 0644]

index f6e1132f4e352d40ab5c5454f3be06aa016b6211..4a0c9dbd62fd0cf56744490d069697f7062c9aa7 100644 (file)
@@ -8,4 +8,19 @@ extern void * memset(void *, int, size_t);
 #define __HAVE_ARCH_MEMCPY
 void * memcpy(void * dest,const void *src,size_t count);
 
+#define __HAVE_ARCH_STRLEN
+extern size_t strlen(const char *s);
+
+#define __HAVE_ARCH_STRCPY
+extern char *strcpy(char *dest, const char *src);
+
+#define __HAVE_ARCH_STRNCPY
+extern char *strncpy(char *dest, const char *src, size_t count);
+
+#define __HAVE_ARCH_STRCAT
+extern char *strcat(char *dest, const char *src);
+
+#define __HAVE_ARCH_MEMSET
+extern void *memset(void *, int, size_t);
+
 #endif
index e8a6a751dfd8eec60f337035d36feef0afe0007f..8ed409ecec9335e186a5d2684e3e10ad79facba4 100644 (file)
 
 #include <linux/string.h>
 EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(strlen);
+EXPORT_SYMBOL(strcpy);
+EXPORT_SYMBOL(strncpy);
+EXPORT_SYMBOL(strcat);
 
 #include <linux/atomic.h>
 EXPORT_SYMBOL(__xchg8);
index 7b197667faf6cda53fa8ccafdd0e3ebddf3c22f3..2d7a9974dbaef3cdd84be3daf739589f18f8376a 100644 (file)
@@ -3,7 +3,7 @@
 # Makefile for parisc-specific library files
 #
 
-lib-y  := lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \
-          ucmpdi2.o delay.o
+lib-y  := lusercopy.o bitops.o checksum.o io.o memcpy.o \
+          ucmpdi2.o delay.o string.o
 
 obj-y  := iomap.o
diff --git a/arch/parisc/lib/memset.c b/arch/parisc/lib/memset.c
deleted file mode 100644 (file)
index 1d7929b..0000000
+++ /dev/null
@@ -1,91 +0,0 @@
-/* Copyright (C) 1991, 1997 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-/* Slight modifications for pa-risc linux - Paul Bame <bame@debian.org> */
-
-#include <linux/types.h>
-#include <asm/string.h>
-
-#define OPSIZ (BITS_PER_LONG/8)
-typedef unsigned long op_t;
-
-void *
-memset (void *dstpp, int sc, size_t len)
-{
-  unsigned int c = sc;
-  long int dstp = (long int) dstpp;
-
-  if (len >= 8)
-    {
-      size_t xlen;
-      op_t cccc;
-
-      cccc = (unsigned char) c;
-      cccc |= cccc << 8;
-      cccc |= cccc << 16;
-      if (OPSIZ > 4)
-       /* Do the shift in two steps to avoid warning if long has 32 bits.  */
-       cccc |= (cccc << 16) << 16;
-
-      /* There are at least some bytes to set.
-        No need to test for LEN == 0 in this alignment loop.  */
-      while (dstp % OPSIZ != 0)
-       {
-         ((unsigned char *) dstp)[0] = c;
-         dstp += 1;
-         len -= 1;
-       }
-
-      /* Write 8 `op_t' per iteration until less than 8 `op_t' remain.  */
-      xlen = len / (OPSIZ * 8);
-      while (xlen > 0)
-       {
-         ((op_t *) dstp)[0] = cccc;
-         ((op_t *) dstp)[1] = cccc;
-         ((op_t *) dstp)[2] = cccc;
-         ((op_t *) dstp)[3] = cccc;
-         ((op_t *) dstp)[4] = cccc;
-         ((op_t *) dstp)[5] = cccc;
-         ((op_t *) dstp)[6] = cccc;
-         ((op_t *) dstp)[7] = cccc;
-         dstp += 8 * OPSIZ;
-         xlen -= 1;
-       }
-      len %= OPSIZ * 8;
-
-      /* Write 1 `op_t' per iteration until less than OPSIZ bytes remain.  */
-      xlen = len / OPSIZ;
-      while (xlen > 0)
-       {
-         ((op_t *) dstp)[0] = cccc;
-         dstp += OPSIZ;
-         xlen -= 1;
-       }
-      len %= OPSIZ;
-    }
-
-  /* Write the last few bytes.  */
-  while (len > 0)
-    {
-      ((unsigned char *) dstp)[0] = c;
-      dstp += 1;
-      len -= 1;
-    }
-
-  return dstpp;
-}
diff --git a/arch/parisc/lib/string.S b/arch/parisc/lib/string.S
new file mode 100644 (file)
index 0000000..4a64264
--- /dev/null
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    PA-RISC assembly string functions
+ *
+ *    Copyright (C) 2019 Helge Deller <deller@gmx.de>
+ */
+
+#include <asm/assembly.h>
+#include <linux/linkage.h>
+
+       .section .text.hot
+       .level PA_ASM_LEVEL
+
+       t0 = r20
+       t1 = r21
+       t2 = r22
+
+ENTRY_CFI(strlen, frame=0,no_calls)
+       or,COND(<>) arg0,r0,ret0
+       b,l,n   .Lstrlen_null_ptr,r0
+       depwi   0,31,2,ret0
+       cmpb,COND(<>) arg0,ret0,.Lstrlen_not_aligned
+       ldw,ma  4(ret0),t0
+       cmpib,tr 0,r0,.Lstrlen_loop
+       uxor,nbz r0,t0,r0
+.Lstrlen_not_aligned:
+       uaddcm  arg0,ret0,t1
+       shladd  t1,3,r0,t1
+       mtsar   t1
+       depwi   -1,%sar,32,t0
+       uxor,nbz r0,t0,r0
+.Lstrlen_loop:
+       b,l,n   .Lstrlen_end_loop,r0
+       ldw,ma  4(ret0),t0
+       cmpib,tr 0,r0,.Lstrlen_loop
+       uxor,nbz r0,t0,r0
+.Lstrlen_end_loop:
+       extrw,u,<> t0,7,8,r0
+       addib,tr,n -3,ret0,.Lstrlen_out
+       extrw,u,<> t0,15,8,r0
+       addib,tr,n -2,ret0,.Lstrlen_out
+       extrw,u,<> t0,23,8,r0
+       addi    -1,ret0,ret0
+.Lstrlen_out:
+       bv r0(rp)
+       uaddcm ret0,arg0,ret0
+.Lstrlen_null_ptr:
+       bv,n r0(rp)
+ENDPROC_CFI(strlen)
+
+
+ENTRY_CFI(strcpy, frame=0,no_calls)
+       ldb     0(arg1),t0
+       stb     t0,0(arg0)
+       ldo     0(arg0),ret0
+       ldo     1(arg1),t1
+       cmpb,=  r0,t0,2f
+       ldo     1(arg0),t2
+1:     ldb     0(t1),arg1
+       stb     arg1,0(t2)
+       ldo     1(t1),t1
+       cmpb,<> r0,arg1,1b
+       ldo     1(t2),t2
+2:     bv,n    r0(rp)
+ENDPROC_CFI(strcpy)
+
+
+ENTRY_CFI(strncpy, frame=0,no_calls)
+       ldb     0(arg1),t0
+       stb     t0,0(arg0)
+       ldo     1(arg1),t1
+       ldo     0(arg0),ret0
+       cmpb,=  r0,t0,2f
+       ldo     1(arg0),arg1
+1:     ldo     -1(arg2),arg2
+       cmpb,COND(=),n r0,arg2,2f
+       ldb     0(t1),arg0
+       stb     arg0,0(arg1)
+       ldo     1(t1),t1
+       cmpb,<> r0,arg0,1b
+       ldo     1(arg1),arg1
+2:     bv,n    r0(rp)
+ENDPROC_CFI(strncpy)
+
+
+ENTRY_CFI(strcat, frame=0,no_calls)
+       ldb     0(arg0),t0
+       cmpb,=  t0,r0,2f
+       ldo     0(arg0),ret0
+       ldo     1(arg0),arg0
+1:     ldb     0(arg0),t1
+       cmpb,<>,n r0,t1,1b
+       ldo     1(arg0),arg0
+2:     ldb     0(arg1),t2
+       stb     t2,0(arg0)
+       ldo     1(arg0),arg0
+       ldb     0(arg1),t0
+       cmpb,<> r0,t0,2b
+       ldo     1(arg1),arg1
+       bv,n    r0(rp)
+ENDPROC_CFI(strcat)
+
+
+ENTRY_CFI(memset, frame=0,no_calls)
+       copy    arg0,ret0
+       cmpb,COND(=) r0,arg0,4f
+       copy    arg0,t2
+       cmpb,COND(=) r0,arg2,4f
+       ldo     -1(arg2),arg3
+       subi    -1,arg3,t0
+       subi    0,t0,t1
+       cmpiclr,COND(>=) 0,t1,arg2
+       ldo     -1(t1),arg2
+       extru arg2,31,2,arg0
+2:     stb     arg1,0(t2)
+       ldo     1(t2),t2
+       addib,>= -1,arg0,2b
+       ldo     -1(arg3),arg3
+       cmpiclr,COND(<=) 4,arg2,r0
+       b,l,n   4f,r0
+#ifdef CONFIG_64BIT
+       depd,*  r0,63,2,arg2
+#else
+       depw    r0,31,2,arg2
+#endif
+       ldo     1(t2),t2
+3:     stb     arg1,-1(t2)
+       stb     arg1,0(t2)
+       stb     arg1,1(t2)
+       stb     arg1,2(t2)
+       addib,COND(>) -4,arg2,3b
+       ldo     4(t2),t2
+4:     bv,n    r0(rp)
+ENDPROC_CFI(memset)
+
+       .end