1 From 857117cae13c214c709931c5f84e67249c7a3c81 Mon Sep 17 00:00:00 2001
2 From: popcornmix <popcornmix@gmail.com>
3 Date: Mon, 28 Nov 2016 16:50:04 +0000
4 Subject: [PATCH] Improve __copy_to_user and __copy_from_user
7 Provide a __copy_from_user that uses memcpy. On BCM2708, use
8 optimised memcpy/memmove/memcmp/memset implementations.
10 arch/arm: Add mmiocpy/set aliases for memcpy/set
12 See: https://github.com/raspberrypi/linux/issues/1082
14 copy_from_user: CPU_SW_DOMAIN_PAN compatibility
16 The downstream copy_from_user acceleration must also play nice with
17 CONFIG_CPU_SW_DOMAIN_PAN.
19 See: https://github.com/raspberrypi/linux/issues/1381
21 Signed-off-by: Phil Elwell <phil@raspberrypi.org>
23 arch/arm/include/asm/string.h | 5 +
24 arch/arm/include/asm/uaccess.h | 3 +
25 arch/arm/lib/Makefile | 14 +-
26 arch/arm/lib/arm-mem.h | 159 +++++++++
27 arch/arm/lib/copy_from_user.S | 4 +-
28 arch/arm/lib/exports_rpi.c | 37 +++
29 arch/arm/lib/memcmp_rpi.S | 285 ++++++++++++++++
30 arch/arm/lib/memcpy_rpi.S | 61 ++++
31 arch/arm/lib/memcpymove.h | 506 +++++++++++++++++++++++++++++
32 arch/arm/lib/memmove_rpi.S | 61 ++++
33 arch/arm/lib/memset_rpi.S | 128 ++++++++
34 arch/arm/lib/uaccess_with_memcpy.c | 120 ++++++-
35 arch/arm/mach-bcm/Kconfig | 7 +
36 13 files changed, 1385 insertions(+), 5 deletions(-)
37 create mode 100644 arch/arm/lib/arm-mem.h
38 create mode 100644 arch/arm/lib/exports_rpi.c
39 create mode 100644 arch/arm/lib/memcmp_rpi.S
40 create mode 100644 arch/arm/lib/memcpy_rpi.S
41 create mode 100644 arch/arm/lib/memcpymove.h
42 create mode 100644 arch/arm/lib/memmove_rpi.S
43 create mode 100644 arch/arm/lib/memset_rpi.S
45 --- a/arch/arm/include/asm/string.h
46 +++ b/arch/arm/include/asm/string.h
47 @@ -39,4 +39,9 @@ static inline void *memset64(uint64_t *p
48 return __memset64(p, v, n * 8, v >> 32);
51 +#ifdef CONFIG_BCM2835_FAST_MEMCPY
52 +#define __HAVE_ARCH_MEMCMP
53 +extern int memcmp(const void *, const void *, size_t);
57 --- a/arch/arm/include/asm/uaccess.h
58 +++ b/arch/arm/include/asm/uaccess.h
59 @@ -512,6 +512,9 @@ do { \
60 extern unsigned long __must_check
61 arm_copy_from_user(void *to, const void __user *from, unsigned long n);
63 +extern unsigned long __must_check
64 +__copy_from_user_std(void *to, const void __user *from, unsigned long n);
66 static inline unsigned long __must_check
67 raw_copy_from_user(void *to, const void __user *from, unsigned long n)
69 --- a/arch/arm/lib/Makefile
70 +++ b/arch/arm/lib/Makefile
73 lib-y := changebit.o csumipv6.o csumpartial.o \
74 csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
75 - delay.o delay-loop.o findbit.o memchr.o memcpy.o \
76 - memmove.o memset.o setbit.o \
77 + delay.o delay-loop.o findbit.o memchr.o \
80 testchangebit.o testclearbit.o testsetbit.o \
81 ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
82 @@ -25,6 +25,16 @@ else
86 +# Choose optimised implementations for Raspberry Pi
87 +ifeq ($(CONFIG_BCM2835_FAST_MEMCPY),y)
88 + CFLAGS_uaccess_with_memcpy.o += -DCOPY_FROM_USER_THRESHOLD=1600
89 + CFLAGS_uaccess_with_memcpy.o += -DCOPY_TO_USER_THRESHOLD=672
90 + obj-$(CONFIG_MODULES) += exports_rpi.o
91 + lib-y += memcpy_rpi.o memmove_rpi.o memset_rpi.o memcmp_rpi.o
93 + lib-y += memcpy.o memmove.o memset.o
96 # using lib_ here won't override already available weak symbols
97 obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o
100 +++ b/arch/arm/lib/arm-mem.h
103 +Copyright (c) 2013, Raspberry Pi Foundation
104 +Copyright (c) 2013, RISC OS Open Ltd
105 +All rights reserved.
107 +Redistribution and use in source and binary forms, with or without
108 +modification, are permitted provided that the following conditions are met:
109 + * Redistributions of source code must retain the above copyright
110 + notice, this list of conditions and the following disclaimer.
111 + * Redistributions in binary form must reproduce the above copyright
112 + notice, this list of conditions and the following disclaimer in the
113 + documentation and/or other materials provided with the distribution.
114 + * Neither the name of the copyright holder nor the
115 + names of its contributors may be used to endorse or promote products
116 + derived from this software without specific prior written permission.
118 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
119 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
120 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
121 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
122 +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
123 +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
124 +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
125 +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
126 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
127 +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
136 +.macro preload_leading_step1 backwards, ptr, base
137 +/* If the destination is already 16-byte aligned, then we need to preload
138 + * between 0 and prefetch_distance (inclusive) cache lines ahead so there
139 + * are no gaps when the inner loop starts.
148 + .rept prefetch_distance+1
151 + .set OFFSET, OFFSET-32
153 + .set OFFSET, OFFSET+32
158 +.macro preload_leading_step2 backwards, ptr, base, leading_bytes, tmp
159 +/* However, if the destination is not 16-byte aligned, we may need to
160 + * preload one more cache line than that. The question we need to ask is:
161 + * are the leading bytes more than the amount by which the source
162 + * pointer will be rounded down for preloading, and if so, by how many
166 +/* Here we compare against how many bytes we are into the
167 + * cache line, counting down from the highest such address.
168 + * Effectively, we want to calculate
169 + * leading_bytes = dst&15
170 + * cacheline_offset = 31-((src-leading_bytes-1)&31)
171 + * extra_needed = leading_bytes - cacheline_offset
172 + * and test if extra_needed is <= 0, or rearranging:
173 + * leading_bytes + (src-leading_bytes-1)&31 <= 31
175 + mov tmp, base, lsl #32-5
176 + sbc tmp, tmp, leading_bytes, lsl #32-5
177 + adds tmp, tmp, leading_bytes, lsl #32-5
179 + pld [ptr, #-32*(prefetch_distance+1)]
181 +/* Effectively, we want to calculate
182 + * leading_bytes = (-dst)&15
183 + * cacheline_offset = (src+leading_bytes)&31
184 + * extra_needed = leading_bytes - cacheline_offset
185 + * and test if extra_needed is <= 0.
187 + mov tmp, base, lsl #32-5
188 + add tmp, tmp, leading_bytes, lsl #32-5
189 + rsbs tmp, tmp, leading_bytes, lsl #32-5
191 + pld [ptr, #32*(prefetch_distance+1)]
196 +.macro preload_trailing backwards, base, remain, tmp
197 + /* We need either 0, 1 or 2 extra preloads */
200 + mov tmp, tmp, lsl #32-5
202 + mov tmp, base, lsl #32-5
204 + adds tmp, tmp, remain, lsl #32-5
205 + adceqs tmp, tmp, #0
206 + /* The instruction above has two effects: ensures Z is only
207 + * set if C was clear (so Z indicates that both shifted quantities
208 + * were 0), and clears C if Z was set (so C indicates that the sum
209 + * of the shifted quantities was greater and not equal to 32) */
219 + pld [tmp, #-32*(prefetch_distance+1)]
221 + pld [tmp, #-32*prefetch_distance]
223 + pld [tmp, #32*(prefetch_distance+2)]
225 + pld [tmp, #32*(prefetch_distance+1)]
230 +.macro preload_all backwards, narrow_case, shift, base, remain, tmp0, tmp1
233 + bic tmp0, tmp0, #31
235 + sub tmp1, base, remain, lsl #shift
237 + bic tmp0, base, #31
239 + add tmp1, base, remain, lsl #shift
242 + bic tmp1, tmp1, #31
246 + /* In this case, all the data fits in either 1 or 2 cache lines */
251 + sub tmp0, tmp0, #32
253 + add tmp0, tmp0, #32
261 --- a/arch/arm/lib/copy_from_user.S
262 +++ b/arch/arm/lib/copy_from_user.S
267 -ENTRY(arm_copy_from_user)
268 +ENTRY(__copy_from_user_std)
269 +WEAK(arm_copy_from_user)
270 #ifdef CONFIG_CPU_SPECTRE
272 ldr r3, [r3, #TI_ADDR_LIMIT]
273 @@ -117,6 +118,7 @@ ENTRY(arm_copy_from_user)
274 #include "copy_template.S"
276 ENDPROC(arm_copy_from_user)
277 +ENDPROC(__copy_from_user_std)
279 .pushsection .fixup,"ax"
282 +++ b/arch/arm/lib/exports_rpi.c
285 + * Copyright (c) 2014, Raspberry Pi (Trading) Ltd.
287 + * Redistribution and use in source and binary forms, with or without
288 + * modification, are permitted provided that the following conditions
290 + * 1. Redistributions of source code must retain the above copyright
291 + * notice, this list of conditions, and the following disclaimer,
292 + * without modification.
293 + * 2. Redistributions in binary form must reproduce the above copyright
294 + * notice, this list of conditions and the following disclaimer in the
295 + * documentation and/or other materials provided with the distribution.
296 + * 3. The names of the above-listed copyright holders may not be used
297 + * to endorse or promote products derived from this software without
298 + * specific prior written permission.
300 + * ALTERNATIVELY, this software may be distributed under the terms of the
301 + * GNU General Public License ("GPL") version 2, as published by the Free
302 + * Software Foundation.
304 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
305 + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
306 + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
307 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
308 + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
309 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
310 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
311 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
312 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
313 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
314 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
317 +#include <linux/kernel.h>
318 +#include <linux/module.h>
320 +EXPORT_SYMBOL(memcmp);
322 +++ b/arch/arm/lib/memcmp_rpi.S
325 +Copyright (c) 2013, Raspberry Pi Foundation
326 +Copyright (c) 2013, RISC OS Open Ltd
327 +All rights reserved.
329 +Redistribution and use in source and binary forms, with or without
330 +modification, are permitted provided that the following conditions are met:
331 + * Redistributions of source code must retain the above copyright
332 + notice, this list of conditions and the following disclaimer.
333 + * Redistributions in binary form must reproduce the above copyright
334 + notice, this list of conditions and the following disclaimer in the
335 + documentation and/or other materials provided with the distribution.
336 + * Neither the name of the copyright holder nor the
337 + names of its contributors may be used to endorse or promote products
338 + derived from this software without specific prior written permission.
340 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
341 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
342 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
343 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
344 +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
345 +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
346 +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
347 +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
348 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
349 +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
352 +#include <linux/linkage.h>
353 +#include "arm-mem.h"
355 +/* Prevent the stack from becoming executable */
356 +#if defined(__linux__) && defined(__ELF__)
357 +.section .note.GNU-stack,"",%progbits
367 +.macro memcmp_process_head unaligned
369 + ldr DAT0, [S_1], #4
370 + ldr DAT1, [S_1], #4
371 + ldr DAT2, [S_1], #4
372 + ldr DAT3, [S_1], #4
374 + ldmia S_1!, {DAT0, DAT1, DAT2, DAT3}
376 + ldmia S_2!, {DAT4, DAT5, DAT6, DAT7}
379 +.macro memcmp_process_tail
387 +.macro memcmp_leading_31bytes
388 + movs DAT0, OFF, lsl #31
389 + ldrmib DAT0, [S_1], #1
390 + ldrcsh DAT1, [S_1], #2
391 + ldrmib DAT4, [S_2], #1
392 + ldrcsh DAT5, [S_2], #2
402 + movs DAT0, OFF, lsl #29
403 + ldrmi DAT0, [S_1], #4
404 + ldrcs DAT1, [S_1], #4
405 + ldrcs DAT2, [S_1], #4
406 + ldrmi DAT4, [S_2], #4
407 + ldmcsia S_2!, {DAT5, DAT6}
422 + memcmp_process_head 1
424 + memcmp_process_tail
428 +.macro memcmp_trailing_15bytes unaligned
431 + ldrcs DAT0, [S_1], #4
432 + ldrcs DAT1, [S_1], #4
434 + ldmcsia S_1!, {DAT0, DAT1}
436 + ldrmi DAT2, [S_1], #4
437 + ldmcsia S_2!, {DAT4, DAT5}
438 + ldrmi DAT6, [S_2], #4
450 + ldrcsh DAT0, [S_1], #2
452 + ldrcsh DAT4, [S_2], #2
463 +.macro memcmp_long_inner_loop unaligned
465 + memcmp_process_head unaligned
466 + pld [S_2, #prefetch_distance*32 + 16]
467 + memcmp_process_tail
468 + memcmp_process_head unaligned
470 + memcmp_process_tail
473 + /* Just before the final (prefetch_distance+1) 32-byte blocks,
474 + * deal with final preloads */
475 + preload_trailing 0, S_1, N, DAT0
476 + preload_trailing 0, S_2, N, DAT0
477 + add N, N, #(prefetch_distance+2)*32 - 16
479 + memcmp_process_head unaligned
480 + memcmp_process_tail
483 + /* Trailing words and bytes */
486 + memcmp_trailing_15bytes unaligned
487 +199: /* Reached end without detecting a difference */
490 + pop {DAT1-DAT6, pc}
493 +.macro memcmp_short_inner_loop unaligned
494 + subs N, N, #16 /* simplifies inner loop termination */
497 + memcmp_process_head unaligned
498 + memcmp_process_tail
501 +122: /* Trailing words and bytes */
504 + memcmp_trailing_15bytes unaligned
505 +199: /* Reached end without detecting a difference */
508 + pop {DAT1-DAT6, pc}
512 + * int memcmp(const void *s1, const void *s2, size_t n);
514 + * a1 = pointer to buffer 1
515 + * a2 = pointer to buffer 2
516 + * a3 = number of bytes to compare (as unsigned chars)
518 + * a1 = >0/=0/<0 if s1 >/=/< s2
521 +.set prefetch_distance, 2
537 + push {DAT1-DAT6, lr}
538 + setend be /* lowest-addressed bytes are most significant */
540 + /* To preload ahead as we go, we need at least (prefetch_distance+2) 32-byte blocks */
541 + cmp N, #(prefetch_distance+3)*32 - 1
545 + /* Adjust N so that the decrement instruction can also test for
546 + * inner loop termination. We want it to stop when there are
547 + * (prefetch_distance+1) complete blocks to go. */
548 + sub N, N, #(prefetch_distance+2)*32
549 + preload_leading_step1 0, DAT0, S_1
550 + preload_leading_step1 0, DAT1, S_2
553 + rsb OFF, S_2, #0 /* no need to AND with 15 here */
554 + preload_leading_step2 0, DAT0, S_1, OFF, DAT2
555 + preload_leading_step2 0, DAT1, S_2, OFF, DAT2
556 + memcmp_leading_31bytes
557 +154: /* Second source now cacheline (32-byte) aligned; we have at
558 + * least one prefetch to go. */
559 + /* Prefetch offset is best selected such that it lies in the
560 + * first 8 of each 32 bytes - but it's just as easy to aim for
563 + rsb OFF, OFF, #32*prefetch_distance
566 + memcmp_long_inner_loop 0
567 +140: memcmp_long_inner_loop 1
569 +170: /* Short case */
572 + preload_all 0, 0, 0, S_1, N, DAT0, DAT1
573 + preload_all 0, 0, 0, S_2, N, DAT0, DAT1
578 + ldrb DAT0, [S_1], #1
579 + ldrb DAT4, [S_2], #1
584 +174: /* Second source now 4-byte aligned; we have 0 or more bytes to go */
587 + memcmp_short_inner_loop 0
588 +140: memcmp_short_inner_loop 1
590 +200: /* Difference found: determine sign. */
594 + pop {DAT1-DAT6, pc}
610 +++ b/arch/arm/lib/memcpy_rpi.S
613 +Copyright (c) 2013, Raspberry Pi Foundation
614 +Copyright (c) 2013, RISC OS Open Ltd
615 +All rights reserved.
617 +Redistribution and use in source and binary forms, with or without
618 +modification, are permitted provided that the following conditions are met:
619 + * Redistributions of source code must retain the above copyright
620 + notice, this list of conditions and the following disclaimer.
621 + * Redistributions in binary form must reproduce the above copyright
622 + notice, this list of conditions and the following disclaimer in the
623 + documentation and/or other materials provided with the distribution.
624 + * Neither the name of the copyright holder nor the
625 + names of its contributors may be used to endorse or promote products
626 + derived from this software without specific prior written permission.
628 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
629 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
630 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
631 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
632 +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
633 +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
634 +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
635 +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
636 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
637 +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
640 +#include <linux/linkage.h>
641 +#include "arm-mem.h"
642 +#include "memcpymove.h"
644 +/* Prevent the stack from becoming executable */
645 +#if defined(__linux__) && defined(__ELF__)
646 +.section .note.GNU-stack,"",%progbits
657 + * void *memcpy(void * restrict s1, const void * restrict s2, size_t n);
659 + * a1 = pointer to destination
660 + * a2 = pointer to source
661 + * a3 = number of bytes to copy
666 +.set prefetch_distance, 3
674 +++ b/arch/arm/lib/memcpymove.h
677 +Copyright (c) 2013, Raspberry Pi Foundation
678 +Copyright (c) 2013, RISC OS Open Ltd
679 +All rights reserved.
681 +Redistribution and use in source and binary forms, with or without
682 +modification, are permitted provided that the following conditions are met:
683 + * Redistributions of source code must retain the above copyright
684 + notice, this list of conditions and the following disclaimer.
685 + * Redistributions in binary form must reproduce the above copyright
686 + notice, this list of conditions and the following disclaimer in the
687 + documentation and/or other materials provided with the distribution.
688 + * Neither the name of the copyright holder nor the
689 + names of its contributors may be used to endorse or promote products
690 + derived from this software without specific prior written permission.
692 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
693 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
694 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
695 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
696 +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
697 +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
698 +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
699 +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
700 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
701 +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
704 +.macro unaligned_words backwards, align, use_pld, words, r0, r1, r2, r3, r4, r5, r6, r7, r8
707 + mov r1, r0, lsl #32-align*8
709 + orr r1, r1, r0, lsr #align*8
712 + mov r0, r1, lsr #align*8
714 + orr r0, r0, r1, lsl #32-align*8
720 + mov r2, r0, lsl #32-align*8
722 + orr r2, r2, r1, lsr #align*8
723 + mov r1, r1, lsl #32-align*8
724 + orr r1, r1, r0, lsr #align*8
728 + mov r0, r2, lsr #align*8
730 + orr r0, r0, r1, lsl #32-align*8
731 + mov r1, r1, lsr #align*8
732 + orr r1, r1, r2, lsl #32-align*8
738 + mov r4, r0, lsl #32-align*8
740 + orr r4, r4, r3, lsr #align*8
741 + mov r3, r3, lsl #32-align*8
742 + orr r3, r3, r2, lsr #align*8
743 + mov r2, r2, lsl #32-align*8
744 + orr r2, r2, r1, lsr #align*8
745 + mov r1, r1, lsl #32-align*8
746 + orr r1, r1, r0, lsr #align*8
747 + stmdb D!, {r1, r2, r3, r4}
750 + mov r0, r4, lsr #align*8
752 + orr r0, r0, r1, lsl #32-align*8
753 + mov r1, r1, lsr #align*8
754 + orr r1, r1, r2, lsl #32-align*8
755 + mov r2, r2, lsr #align*8
756 + orr r2, r2, r3, lsl #32-align*8
757 + mov r3, r3, lsr #align*8
758 + orr r3, r3, r4, lsl #32-align*8
759 + stmia D!, {r0, r1, r2, r3}
763 + ldmdb S!, {r4, r5, r6, r7}
764 + mov r8, r0, lsl #32-align*8
765 + ldmdb S!, {r0, r1, r2, r3}
769 + orr r8, r8, r7, lsr #align*8
770 + mov r7, r7, lsl #32-align*8
771 + orr r7, r7, r6, lsr #align*8
772 + mov r6, r6, lsl #32-align*8
773 + orr r6, r6, r5, lsr #align*8
774 + mov r5, r5, lsl #32-align*8
775 + orr r5, r5, r4, lsr #align*8
776 + mov r4, r4, lsl #32-align*8
777 + orr r4, r4, r3, lsr #align*8
778 + mov r3, r3, lsl #32-align*8
779 + orr r3, r3, r2, lsr #align*8
780 + mov r2, r2, lsl #32-align*8
781 + orr r2, r2, r1, lsr #align*8
782 + mov r1, r1, lsl #32-align*8
783 + orr r1, r1, r0, lsr #align*8
784 + stmdb D!, {r5, r6, r7, r8}
785 + stmdb D!, {r1, r2, r3, r4}
787 + ldmib S!, {r1, r2, r3, r4}
788 + mov r0, r8, lsr #align*8
789 + ldmib S!, {r5, r6, r7, r8}
793 + orr r0, r0, r1, lsl #32-align*8
794 + mov r1, r1, lsr #align*8
795 + orr r1, r1, r2, lsl #32-align*8
796 + mov r2, r2, lsr #align*8
797 + orr r2, r2, r3, lsl #32-align*8
798 + mov r3, r3, lsr #align*8
799 + orr r3, r3, r4, lsl #32-align*8
800 + mov r4, r4, lsr #align*8
801 + orr r4, r4, r5, lsl #32-align*8
802 + mov r5, r5, lsr #align*8
803 + orr r5, r5, r6, lsl #32-align*8
804 + mov r6, r6, lsr #align*8
805 + orr r6, r6, r7, lsl #32-align*8
806 + mov r7, r7, lsr #align*8
807 + orr r7, r7, r8, lsl #32-align*8
808 + stmia D!, {r0, r1, r2, r3}
809 + stmia D!, {r4, r5, r6, r7}
814 +.macro memcpy_leading_15bytes backwards, align
815 + movs DAT1, DAT2, lsl #31
818 + ldrmib DAT0, [S, #-1]!
819 + ldrcsh DAT1, [S, #-2]!
820 + strmib DAT0, [D, #-1]!
821 + strcsh DAT1, [D, #-2]!
823 + ldrmib DAT0, [S], #1
824 + ldrcsh DAT1, [S], #2
825 + strmib DAT0, [D], #1
826 + strcsh DAT1, [D], #2
828 + movs DAT1, DAT2, lsl #29
830 + ldrmi DAT0, [S, #-4]!
832 + ldmcsdb S!, {DAT1, DAT2}
834 + ldrcs DAT2, [S, #-4]!
835 + ldrcs DAT1, [S, #-4]!
837 + strmi DAT0, [D, #-4]!
838 + stmcsdb D!, {DAT1, DAT2}
840 + ldrmi DAT0, [S], #4
842 + ldmcsia S!, {DAT1, DAT2}
844 + ldrcs DAT1, [S], #4
845 + ldrcs DAT2, [S], #4
847 + strmi DAT0, [D], #4
848 + stmcsia D!, {DAT1, DAT2}
852 +.macro memcpy_trailing_15bytes backwards, align
856 + ldmcsdb S!, {DAT0, DAT1}
858 + ldrcs DAT1, [S, #-4]!
859 + ldrcs DAT0, [S, #-4]!
861 + ldrmi DAT2, [S, #-4]!
862 + stmcsdb D!, {DAT0, DAT1}
863 + strmi DAT2, [D, #-4]!
866 + ldmcsia S!, {DAT0, DAT1}
868 + ldrcs DAT0, [S], #4
869 + ldrcs DAT1, [S], #4
871 + ldrmi DAT2, [S], #4
872 + stmcsia D!, {DAT0, DAT1}
873 + strmi DAT2, [D], #4
877 + ldrcsh DAT0, [S, #-2]!
878 + ldrmib DAT1, [S, #-1]
879 + strcsh DAT0, [D, #-2]!
880 + strmib DAT1, [D, #-1]
882 + ldrcsh DAT0, [S], #2
884 + strcsh DAT0, [D], #2
889 +.macro memcpy_long_inner_loop backwards, align
892 + ldr DAT0, [S, #-align]!
894 + ldr LAST, [S, #-align]!
900 + ldmdb S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
902 + stmdb D!, {DAT4, DAT5, DAT6, LAST}
903 + stmdb D!, {DAT0, DAT1, DAT2, DAT3}
905 + ldmia S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
907 + stmia D!, {DAT0, DAT1, DAT2, DAT3}
908 + stmia D!, {DAT4, DAT5, DAT6, LAST}
911 + unaligned_words backwards, align, 1, 8, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7, LAST
915 + /* Just before the final (prefetch_distance+1) 32-byte blocks, deal with final preloads */
916 + preload_trailing backwards, S, N, OFF
917 + add N, N, #(prefetch_distance+2)*32 - 32
921 + ldmdb S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
922 + stmdb D!, {DAT4, DAT5, DAT6, LAST}
923 + stmdb D!, {DAT0, DAT1, DAT2, DAT3}
925 + ldmia S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
926 + stmia D!, {DAT0, DAT1, DAT2, DAT3}
927 + stmia D!, {DAT4, DAT5, DAT6, LAST}
930 + unaligned_words backwards, align, 0, 8, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7, LAST
937 + ldmnedb S!, {DAT0, DAT1, DAT2, LAST}
938 + stmnedb D!, {DAT0, DAT1, DAT2, LAST}
940 + ldmneia S!, {DAT0, DAT1, DAT2, LAST}
941 + stmneia D!, {DAT0, DAT1, DAT2, LAST}
945 + unaligned_words backwards, align, 0, 4, DAT0, DAT1, DAT2, DAT3, LAST
948 + /* Trailing words and bytes */
954 + memcpy_trailing_15bytes backwards, align
956 + pop {DAT3, DAT4, DAT5, DAT6, DAT7}
957 + pop {D, DAT1, DAT2, pc}
960 +.macro memcpy_medium_inner_loop backwards, align
964 + ldmdb S!, {DAT0, DAT1, DAT2, LAST}
966 + ldr LAST, [S, #-4]!
967 + ldr DAT2, [S, #-4]!
968 + ldr DAT1, [S, #-4]!
969 + ldr DAT0, [S, #-4]!
971 + stmdb D!, {DAT0, DAT1, DAT2, LAST}
974 + ldmia S!, {DAT0, DAT1, DAT2, LAST}
981 + stmia D!, {DAT0, DAT1, DAT2, LAST}
985 + /* Trailing words and bytes */
988 + memcpy_trailing_15bytes backwards, align
990 + pop {D, DAT1, DAT2, pc}
993 +.macro memcpy_short_inner_loop backwards, align
997 + ldmnedb S!, {DAT0, DAT1, DAT2, LAST}
999 + ldrne LAST, [S, #-4]!
1000 + ldrne DAT2, [S, #-4]!
1001 + ldrne DAT1, [S, #-4]!
1002 + ldrne DAT0, [S, #-4]!
1004 + stmnedb D!, {DAT0, DAT1, DAT2, LAST}
1007 + ldmneia S!, {DAT0, DAT1, DAT2, LAST}
1009 + ldrne DAT0, [S], #4
1010 + ldrne DAT1, [S], #4
1011 + ldrne DAT2, [S], #4
1012 + ldrne LAST, [S], #4
1014 + stmneia D!, {DAT0, DAT1, DAT2, LAST}
1016 + memcpy_trailing_15bytes backwards, align
1018 + pop {D, DAT1, DAT2, pc}
1021 +.macro memcpy backwards
1038 + push {D, DAT1, DAT2, lr}
1040 + .cfi_def_cfa_offset 16
1041 + .cfi_rel_offset D, 0
1044 + .cfi_undefined DAT0
1045 + .cfi_rel_offset DAT1, 4
1046 + .cfi_rel_offset DAT2, 8
1047 + .cfi_undefined LAST
1048 + .cfi_rel_offset lr, 12
1055 + /* See if we're guaranteed to have at least one 16-byte aligned 16-byte write */
1058 + /* To preload ahead as we go, we need at least (prefetch_distance+2) 32-byte blocks */
1059 + cmp N, #(prefetch_distance+3)*32 - 1
1063 + push {DAT3, DAT4, DAT5, DAT6, DAT7}
1065 + .cfi_def_cfa_offset 36
1066 + .cfi_rel_offset D, 20
1067 + .cfi_rel_offset DAT1, 24
1068 + .cfi_rel_offset DAT2, 28
1069 + .cfi_rel_offset DAT3, 0
1070 + .cfi_rel_offset DAT4, 4
1071 + .cfi_rel_offset DAT5, 8
1072 + .cfi_rel_offset DAT6, 12
1073 + .cfi_rel_offset DAT7, 16
1074 + .cfi_rel_offset lr, 32
1076 + /* Adjust N so that the decrement instruction can also test for
1077 + * inner loop termination. We want it to stop when there are
1078 + * (prefetch_distance+1) complete blocks to go. */
1079 + sub N, N, #(prefetch_distance+2)*32
1080 + preload_leading_step1 backwards, DAT0, S
1082 + /* Bug in GAS: it accepts, but mis-assembles the instruction
1083 + * ands DAT2, D, #60, 2
1084 + * which sets DAT2 to the number of leading bytes until destination is aligned and also clears C (sets borrow)
1091 + rsb DAT2, DAT2, #16 /* number of leading bytes until destination aligned */
1093 + preload_leading_step2 backwards, DAT0, S, DAT2, OFF
1094 + memcpy_leading_15bytes backwards, 1
1095 +154: /* Destination now 16-byte aligned; we have at least one prefetch as well as at least one 16-byte output block */
1096 + /* Prefetch offset is best selected such that it lies in the first 8 of each 32 bytes - but it's just as easy to aim for the first one */
1100 + sub OFF, OFF, #32*(prefetch_distance+1)
1103 + rsb OFF, OFF, #32*prefetch_distance
1105 + movs DAT0, S, lsl #31
1109 + memcpy_long_inner_loop backwards, 0
1110 +155: memcpy_long_inner_loop backwards, 1
1111 +156: memcpy_long_inner_loop backwards, 2
1112 +157: memcpy_long_inner_loop backwards, 3
1114 + .cfi_def_cfa_offset 16
1115 + .cfi_rel_offset D, 0
1116 + .cfi_rel_offset DAT1, 4
1117 + .cfi_rel_offset DAT2, 8
1118 + .cfi_same_value DAT3
1119 + .cfi_same_value DAT4
1120 + .cfi_same_value DAT5
1121 + .cfi_same_value DAT6
1122 + .cfi_same_value DAT7
1123 + .cfi_rel_offset lr, 12
1125 +160: /* Medium case */
1126 + preload_all backwards, 0, 0, S, N, DAT2, OFF
1127 + sub N, N, #16 /* simplifies inner loop termination */
1134 + rsb DAT2, DAT2, #16
1136 + memcpy_leading_15bytes backwards, align
1137 +164: /* Destination now 16-byte aligned; we have at least one 16-byte output block */
1140 + memcpy_medium_inner_loop backwards, 0
1141 +140: memcpy_medium_inner_loop backwards, 1
1143 +170: /* Short case, less than 31 bytes, so no guarantee of at least one 16-byte block */
1146 + preload_all backwards, 1, 0, S, N, DAT2, LAST
1152 + ldrb DAT0, [S, #-1]!
1153 + strb DAT0, [D, #-1]!
1155 + ldrb DAT0, [S], #1
1156 + strb DAT0, [D], #1
1160 +174: /* Destination now 4-byte aligned; we have 0 or more output bytes to go */
1163 + memcpy_short_inner_loop backwards, 0
1164 +140: memcpy_short_inner_loop backwards, 1
1183 +++ b/arch/arm/lib/memmove_rpi.S
1186 +Copyright (c) 2013, Raspberry Pi Foundation
1187 +Copyright (c) 2013, RISC OS Open Ltd
1188 +All rights reserved.
1190 +Redistribution and use in source and binary forms, with or without
1191 +modification, are permitted provided that the following conditions are met:
1192 + * Redistributions of source code must retain the above copyright
1193 + notice, this list of conditions and the following disclaimer.
1194 + * Redistributions in binary form must reproduce the above copyright
1195 + notice, this list of conditions and the following disclaimer in the
1196 + documentation and/or other materials provided with the distribution.
1197 + * Neither the name of the copyright holder nor the
1198 + names of its contributors may be used to endorse or promote products
1199 + derived from this software without specific prior written permission.
1201 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
1202 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1203 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1204 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
1205 +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
1206 +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
1207 +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
1208 +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1209 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
1210 +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1213 +#include <linux/linkage.h>
1214 +#include "arm-mem.h"
1215 +#include "memcpymove.h"
1217 +/* Prevent the stack from becoming executable */
1218 +#if defined(__linux__) && defined(__ELF__)
1219 +.section .note.GNU-stack,"",%progbits
1224 + .object_arch armv4
1230 + * void *memmove(void *s1, const void *s2, size_t n);
1232 + * a1 = pointer to destination
1233 + * a2 = pointer to source
1234 + * a3 = number of bytes to copy
1239 +.set prefetch_distance, 3
1243 + bpl memcpy /* pl works even over -1 - 0 and 0x7fffffff - 0x80000000 boundaries */
1247 +++ b/arch/arm/lib/memset_rpi.S
1250 +Copyright (c) 2013, Raspberry Pi Foundation
1251 +Copyright (c) 2013, RISC OS Open Ltd
1252 +All rights reserved.
1254 +Redistribution and use in source and binary forms, with or without
1255 +modification, are permitted provided that the following conditions are met:
1256 + * Redistributions of source code must retain the above copyright
1257 + notice, this list of conditions and the following disclaimer.
1258 + * Redistributions in binary form must reproduce the above copyright
1259 + notice, this list of conditions and the following disclaimer in the
1260 + documentation and/or other materials provided with the distribution.
1261 + * Neither the name of the copyright holder nor the
1262 + names of its contributors may be used to endorse or promote products
1263 + derived from this software without specific prior written permission.
1265 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
1266 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1267 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1268 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
1269 +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
1270 +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
1271 +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
1272 +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1273 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
1274 +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1277 +#include <linux/linkage.h>
1278 +#include "arm-mem.h"
1280 +/* Prevent the stack from becoming executable */
1281 +#if defined(__linux__) && defined(__ELF__)
1282 +.section .note.GNU-stack,"",%progbits
1287 + .object_arch armv4
1293 + * void *memset(void *s, int c, size_t n);
1295 + * a1 = pointer to buffer to fill
1296 + * a2 = byte pattern to fill with (caller-narrowed)
1297 + * a3 = number of bytes to fill
1313 + orr DAT0, DAT0, DAT0, lsl #8
1315 + orr DAT0, DAT0, DAT0, lsl #16
1318 + /* See if we're guaranteed to have at least one 16-byte aligned 16-byte write */
1322 +161: sub N, N, #16 /* simplifies inner loop termination */
1323 + /* Leading words and bytes */
1326 + rsb DAT3, S, #0 /* bits 0-3 = number of leading bytes until aligned */
1327 + movs DAT2, DAT3, lsl #31
1329 + strmib DAT0, [S], #1
1331 + strcsh DAT0, [S], #2
1332 + movs DAT2, DAT3, lsl #29
1334 + strmi DAT0, [S], #4
1336 + stmcsia S!, {DAT0, DAT1}
1337 +164: /* Delayed set up of DAT2 and DAT3 so we could use them as scratch registers above */
1340 + /* Now the inner loop of 16-byte stores */
1341 +165: stmia S!, {DAT0, DAT1, DAT2, DAT3}
1344 +166: /* Trailing words and bytes */
1345 + movs N, N, lsl #29
1346 + stmcsia S!, {DAT0, DAT1}
1347 + strmi DAT0, [S], #4
1349 + strcsh DAT0, [S], #2
1353 +170: /* Short case */
1360 + strb DAT0, [S], #1
1364 + stmneia S!, {DAT0, DAT1, DAT2, DAT3}
1373 +ENDPROC(__memset64)
1374 +ENDPROC(__memset32)
1377 --- a/arch/arm/lib/uaccess_with_memcpy.c
1378 +++ b/arch/arm/lib/uaccess_with_memcpy.c
1380 #include <asm/current.h>
1381 #include <asm/page.h>
1383 +#ifndef COPY_FROM_USER_THRESHOLD
1384 +#define COPY_FROM_USER_THRESHOLD 64
1387 +#ifndef COPY_TO_USER_THRESHOLD
1388 +#define COPY_TO_USER_THRESHOLD 64
1392 pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
1394 @@ -81,7 +89,44 @@ pin_page_for_write(const void __user *_a
1398 -static unsigned long noinline
1400 +pin_page_for_read(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
1402 + unsigned long addr = (unsigned long)_addr;
1409 + pgd = pgd_offset(current->mm, addr);
1410 + if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd)))
1414 + pud = pud_offset(pgd, addr);
1415 + if (unlikely(pud_none(*pud) || pud_bad(*pud)))
1420 + pmd = pmd_offset(pud, addr);
1421 + if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd)))
1424 + pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl);
1425 + if (unlikely(!pte_present(*pte) || !pte_young(*pte))) {
1426 + pte_unmap_unlock(pte, ptl);
1436 +unsigned long noinline
1437 __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
1439 unsigned long ua_flags;
1440 @@ -134,6 +179,57 @@ out:
1444 +unsigned long noinline
1445 +__copy_from_user_memcpy(void *to, const void __user *from, unsigned long n)
1447 + unsigned long ua_flags;
1450 + if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
1451 + memcpy(to, (const void *)from, n);
1455 + /* the mmap semaphore is taken only if not in an atomic context */
1456 + atomic = in_atomic();
1459 + down_read(¤t->mm->mmap_sem);
1465 + while (!pin_page_for_read(from, &pte, &ptl)) {
1468 + up_read(¤t->mm->mmap_sem);
1469 + if (__get_user(temp, (char __user *)from))
1472 + down_read(¤t->mm->mmap_sem);
1475 + tocopy = (~(unsigned long)from & ~PAGE_MASK) + 1;
1479 + ua_flags = uaccess_save_and_enable();
1480 + memcpy(to, (const void *)from, tocopy);
1481 + uaccess_restore(ua_flags);
1486 + pte_unmap_unlock(pte, ptl);
1489 + up_read(¤t->mm->mmap_sem);
1496 arm_copy_to_user(void __user *to, const void *from, unsigned long n)
1498 @@ -144,7 +240,7 @@ arm_copy_to_user(void __user *to, const
1499 * With frame pointer disabled, tail call optimization kicks in
1500 * as well making this test almost invisible.
1503 + if (n < COPY_TO_USER_THRESHOLD) {
1504 unsigned long ua_flags = uaccess_save_and_enable();
1505 n = __copy_to_user_std(to, from, n);
1506 uaccess_restore(ua_flags);
1507 @@ -154,6 +250,26 @@ arm_copy_to_user(void __user *to, const
1512 +unsigned long __must_check
1513 +arm_copy_from_user(void *to, const void __user *from, unsigned long n)
1516 + * This test is stubbed out of the main function above to keep
1517 + * the overhead for small copies low by avoiding a large
1518 + * register dump on the stack just to reload them right away.
1519 + * With frame pointer disabled, tail call optimization kicks in
1520 + * as well making this test almost invisible.
1522 + if (n < COPY_TO_USER_THRESHOLD) {
1523 + unsigned long ua_flags = uaccess_save_and_enable();
1524 + n = __copy_from_user_std(to, from, n);
1525 + uaccess_restore(ua_flags);
1527 + n = __copy_from_user_memcpy(to, from, n);
1532 static unsigned long noinline
1533 __clear_user_memset(void __user *addr, unsigned long n)
1534 --- a/arch/arm/mach-bcm/Kconfig
1535 +++ b/arch/arm/mach-bcm/Kconfig
1536 @@ -188,6 +188,13 @@ config ARCH_BCM_53573
1537 The base chip is BCM53573 and there are some packaging modifications
1538 like BCM47189 and BCM47452.
1540 +config BCM2835_FAST_MEMCPY
1541 + bool "Enable optimized __copy_to_user and __copy_from_user"
1542 + depends on ARCH_BCM2835 && ARCH_MULTI_V6
1545 + Optimized versions of __copy_to_user and __copy_from_user for Pi1.
1547 config ARCH_BCM_63XX
1548 bool "Broadcom BCM63xx DSL SoC"
1549 depends on ARCH_MULTI_V7