1 From a013e78cd135c415124496edd439b1031102c33c Mon Sep 17 00:00:00 2001
2 From: popcornmix <popcornmix@gmail.com>
3 Date: Mon, 28 Nov 2016 16:50:04 +0000
4 Subject: [PATCH] Improve __copy_to_user and __copy_from_user
7 Provide a __copy_from_user that uses memcpy. On BCM2708, use
8 optimised memcpy/memmove/memcmp/memset implementations.
10 arch/arm: Add mmiocpy/set aliases for memcpy/set
12 See: https://github.com/raspberrypi/linux/issues/1082
14 copy_from_user: CPU_SW_DOMAIN_PAN compatibility
16 The downstream copy_from_user acceleration must also play nice with
17 CONFIG_CPU_SW_DOMAIN_PAN.
19 See: https://github.com/raspberrypi/linux/issues/1381
21 Signed-off-by: Phil Elwell <phil@raspberrypi.org>
23 Fix copy_from_user if BCM2835_FAST_MEMCPY=n
25 The change which introduced CONFIG_BCM2835_FAST_MEMCPY unconditionally
26 changed the behaviour of arm_copy_from_user. The page pinning code
27 is not safe on ARMv7 if LPAE & high memory is enabled and causes
28 crashes which look like PTE corruption.
30 Make __copy_from_user_memcpy conditional on CONFIG_2835_FAST_MEMCPY=y
31 which is really an ARMv6 / Pi1 optimization and not necessary on newer
34 arm: fix mmap unlocks in uaccess_with_memcpy.c
36 This is a regression that was added with the commit 192a4e923ef092924dd013e7326f2ec520ee4783 as of rpi-5.8.y, since that is when the move to the mmap locking API was introduced - d8ed45c5dcd455fc5848d47f86883a1b872ac0d0
38 The issue is that when the patch to improve performance for the __copy_to_user and __copy_from_user functions were added for the Raspberry Pi, some of the mmaps were incorrectly mapped to write instead of read. This would cause a verity of issues, and in my case, prevent the booting of a squashfs filesystem on rpi-5.8-y and above. An example of the panic you would see from this can be seen at https://pastebin.com/raw/jBz5xCzL
40 Signed-off-by: Christian Lamparter <chunkeey@gmail.com>
41 Signed-off-by: Christopher Blake <chrisrblake93@gmail.com>
43 arch/arm/include/asm/string.h | 5 +
44 arch/arm/include/asm/uaccess.h | 3 +
45 arch/arm/lib/Makefile | 14 +-
46 arch/arm/lib/arm-mem.h | 159 +++++++++
47 arch/arm/lib/copy_from_user.S | 4 +-
48 arch/arm/lib/exports_rpi.c | 37 +++
49 arch/arm/lib/memcmp_rpi.S | 285 ++++++++++++++++
50 arch/arm/lib/memcpy_rpi.S | 61 ++++
51 arch/arm/lib/memcpymove.h | 506 +++++++++++++++++++++++++++++
52 arch/arm/lib/memmove_rpi.S | 61 ++++
53 arch/arm/lib/memset_rpi.S | 128 ++++++++
54 arch/arm/lib/uaccess_with_memcpy.c | 130 +++++++-
55 arch/arm/mach-bcm/Kconfig | 7 +
56 13 files changed, 1394 insertions(+), 6 deletions(-)
57 create mode 100644 arch/arm/lib/arm-mem.h
58 create mode 100644 arch/arm/lib/exports_rpi.c
59 create mode 100644 arch/arm/lib/memcmp_rpi.S
60 create mode 100644 arch/arm/lib/memcpy_rpi.S
61 create mode 100644 arch/arm/lib/memcpymove.h
62 create mode 100644 arch/arm/lib/memmove_rpi.S
63 create mode 100644 arch/arm/lib/memset_rpi.S
65 --- a/arch/arm/include/asm/string.h
66 +++ b/arch/arm/include/asm/string.h
67 @@ -39,4 +39,9 @@ static inline void *memset64(uint64_t *p
68 return __memset64(p, v, n * 8, v >> 32);
71 +#ifdef CONFIG_BCM2835_FAST_MEMCPY
72 +#define __HAVE_ARCH_MEMCMP
73 +extern int memcmp(const void *, const void *, size_t);
77 --- a/arch/arm/include/asm/uaccess.h
78 +++ b/arch/arm/include/asm/uaccess.h
79 @@ -518,6 +518,9 @@ do { \
80 extern unsigned long __must_check
81 arm_copy_from_user(void *to, const void __user *from, unsigned long n);
83 +extern unsigned long __must_check
84 +__copy_from_user_std(void *to, const void __user *from, unsigned long n);
86 static inline unsigned long __must_check
87 raw_copy_from_user(void *to, const void __user *from, unsigned long n)
89 --- a/arch/arm/lib/Makefile
90 +++ b/arch/arm/lib/Makefile
93 lib-y := changebit.o csumipv6.o csumpartial.o \
94 csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
95 - delay.o delay-loop.o findbit.o memchr.o memcpy.o \
96 - memmove.o memset.o setbit.o \
97 + delay.o delay-loop.o findbit.o memchr.o \
100 testchangebit.o testclearbit.o testsetbit.o \
101 ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
102 @@ -25,6 +25,16 @@ else
106 +# Choose optimised implementations for Raspberry Pi
107 +ifeq ($(CONFIG_BCM2835_FAST_MEMCPY),y)
108 + CFLAGS_uaccess_with_memcpy.o += -DCOPY_FROM_USER_THRESHOLD=1600
109 + CFLAGS_uaccess_with_memcpy.o += -DCOPY_TO_USER_THRESHOLD=672
110 + obj-$(CONFIG_MODULES) += exports_rpi.o
111 + lib-y += memcpy_rpi.o memmove_rpi.o memset_rpi.o memcmp_rpi.o
113 + lib-y += memcpy.o memmove.o memset.o
116 # using lib_ here won't override already available weak symbols
117 obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o
120 +++ b/arch/arm/lib/arm-mem.h
123 +Copyright (c) 2013, Raspberry Pi Foundation
124 +Copyright (c) 2013, RISC OS Open Ltd
125 +All rights reserved.
127 +Redistribution and use in source and binary forms, with or without
128 +modification, are permitted provided that the following conditions are met:
129 + * Redistributions of source code must retain the above copyright
130 + notice, this list of conditions and the following disclaimer.
131 + * Redistributions in binary form must reproduce the above copyright
132 + notice, this list of conditions and the following disclaimer in the
133 + documentation and/or other materials provided with the distribution.
134 + * Neither the name of the copyright holder nor the
135 + names of its contributors may be used to endorse or promote products
136 + derived from this software without specific prior written permission.
138 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
139 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
140 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
141 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
142 +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
143 +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
144 +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
145 +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
146 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
147 +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
156 +.macro preload_leading_step1 backwards, ptr, base
157 +/* If the destination is already 16-byte aligned, then we need to preload
158 + * between 0 and prefetch_distance (inclusive) cache lines ahead so there
159 + * are no gaps when the inner loop starts.
168 + .rept prefetch_distance+1
171 + .set OFFSET, OFFSET-32
173 + .set OFFSET, OFFSET+32
178 +.macro preload_leading_step2 backwards, ptr, base, leading_bytes, tmp
179 +/* However, if the destination is not 16-byte aligned, we may need to
180 + * preload one more cache line than that. The question we need to ask is:
181 + * are the leading bytes more than the amount by which the source
182 + * pointer will be rounded down for preloading, and if so, by how many
186 +/* Here we compare against how many bytes we are into the
187 + * cache line, counting down from the highest such address.
188 + * Effectively, we want to calculate
189 + * leading_bytes = dst&15
190 + * cacheline_offset = 31-((src-leading_bytes-1)&31)
191 + * extra_needed = leading_bytes - cacheline_offset
192 + * and test if extra_needed is <= 0, or rearranging:
193 + * leading_bytes + (src-leading_bytes-1)&31 <= 31
195 + mov tmp, base, lsl #32-5
196 + sbc tmp, tmp, leading_bytes, lsl #32-5
197 + adds tmp, tmp, leading_bytes, lsl #32-5
199 + pld [ptr, #-32*(prefetch_distance+1)]
201 +/* Effectively, we want to calculate
202 + * leading_bytes = (-dst)&15
203 + * cacheline_offset = (src+leading_bytes)&31
204 + * extra_needed = leading_bytes - cacheline_offset
205 + * and test if extra_needed is <= 0.
207 + mov tmp, base, lsl #32-5
208 + add tmp, tmp, leading_bytes, lsl #32-5
209 + rsbs tmp, tmp, leading_bytes, lsl #32-5
211 + pld [ptr, #32*(prefetch_distance+1)]
216 +.macro preload_trailing backwards, base, remain, tmp
217 + /* We need either 0, 1 or 2 extra preloads */
220 + mov tmp, tmp, lsl #32-5
222 + mov tmp, base, lsl #32-5
224 + adds tmp, tmp, remain, lsl #32-5
225 + adceqs tmp, tmp, #0
226 + /* The instruction above has two effects: ensures Z is only
227 + * set if C was clear (so Z indicates that both shifted quantities
228 + * were 0), and clears C if Z was set (so C indicates that the sum
229 + * of the shifted quantities was greater and not equal to 32) */
239 + pld [tmp, #-32*(prefetch_distance+1)]
241 + pld [tmp, #-32*prefetch_distance]
243 + pld [tmp, #32*(prefetch_distance+2)]
245 + pld [tmp, #32*(prefetch_distance+1)]
250 +.macro preload_all backwards, narrow_case, shift, base, remain, tmp0, tmp1
253 + bic tmp0, tmp0, #31
255 + sub tmp1, base, remain, lsl #shift
257 + bic tmp0, base, #31
259 + add tmp1, base, remain, lsl #shift
262 + bic tmp1, tmp1, #31
266 + /* In this case, all the data fits in either 1 or 2 cache lines */
271 + sub tmp0, tmp0, #32
273 + add tmp0, tmp0, #32
281 --- a/arch/arm/lib/copy_from_user.S
282 +++ b/arch/arm/lib/copy_from_user.S
287 -ENTRY(arm_copy_from_user)
288 +ENTRY(__copy_from_user_std)
289 +WEAK(arm_copy_from_user)
290 #ifdef CONFIG_CPU_SPECTRE
292 ldr r3, [r3, #TI_ADDR_LIMIT]
293 @@ -117,6 +118,7 @@ ENTRY(arm_copy_from_user)
294 #include "copy_template.S"
296 ENDPROC(arm_copy_from_user)
297 +ENDPROC(__copy_from_user_std)
299 .pushsection .text.fixup,"ax"
302 +++ b/arch/arm/lib/exports_rpi.c
305 + * Copyright (c) 2014, Raspberry Pi (Trading) Ltd.
307 + * Redistribution and use in source and binary forms, with or without
308 + * modification, are permitted provided that the following conditions
310 + * 1. Redistributions of source code must retain the above copyright
311 + * notice, this list of conditions, and the following disclaimer,
312 + * without modification.
313 + * 2. Redistributions in binary form must reproduce the above copyright
314 + * notice, this list of conditions and the following disclaimer in the
315 + * documentation and/or other materials provided with the distribution.
316 + * 3. The names of the above-listed copyright holders may not be used
317 + * to endorse or promote products derived from this software without
318 + * specific prior written permission.
320 + * ALTERNATIVELY, this software may be distributed under the terms of the
321 + * GNU General Public License ("GPL") version 2, as published by the Free
322 + * Software Foundation.
324 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
325 + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
326 + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
327 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
328 + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
329 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
330 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
331 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
332 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
333 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
334 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
337 +#include <linux/kernel.h>
338 +#include <linux/module.h>
340 +EXPORT_SYMBOL(memcmp);
342 +++ b/arch/arm/lib/memcmp_rpi.S
345 +Copyright (c) 2013, Raspberry Pi Foundation
346 +Copyright (c) 2013, RISC OS Open Ltd
347 +All rights reserved.
349 +Redistribution and use in source and binary forms, with or without
350 +modification, are permitted provided that the following conditions are met:
351 + * Redistributions of source code must retain the above copyright
352 + notice, this list of conditions and the following disclaimer.
353 + * Redistributions in binary form must reproduce the above copyright
354 + notice, this list of conditions and the following disclaimer in the
355 + documentation and/or other materials provided with the distribution.
356 + * Neither the name of the copyright holder nor the
357 + names of its contributors may be used to endorse or promote products
358 + derived from this software without specific prior written permission.
360 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
361 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
362 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
363 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
364 +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
365 +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
366 +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
367 +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
368 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
369 +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
372 +#include <linux/linkage.h>
373 +#include "arm-mem.h"
375 +/* Prevent the stack from becoming executable */
376 +#if defined(__linux__) && defined(__ELF__)
377 +.section .note.GNU-stack,"",%progbits
387 +.macro memcmp_process_head unaligned
389 + ldr DAT0, [S_1], #4
390 + ldr DAT1, [S_1], #4
391 + ldr DAT2, [S_1], #4
392 + ldr DAT3, [S_1], #4
394 + ldmia S_1!, {DAT0, DAT1, DAT2, DAT3}
396 + ldmia S_2!, {DAT4, DAT5, DAT6, DAT7}
399 +.macro memcmp_process_tail
407 +.macro memcmp_leading_31bytes
408 + movs DAT0, OFF, lsl #31
409 + ldrmib DAT0, [S_1], #1
410 + ldrcsh DAT1, [S_1], #2
411 + ldrmib DAT4, [S_2], #1
412 + ldrcsh DAT5, [S_2], #2
422 + movs DAT0, OFF, lsl #29
423 + ldrmi DAT0, [S_1], #4
424 + ldrcs DAT1, [S_1], #4
425 + ldrcs DAT2, [S_1], #4
426 + ldrmi DAT4, [S_2], #4
427 + ldmcsia S_2!, {DAT5, DAT6}
442 + memcmp_process_head 1
444 + memcmp_process_tail
448 +.macro memcmp_trailing_15bytes unaligned
451 + ldrcs DAT0, [S_1], #4
452 + ldrcs DAT1, [S_1], #4
454 + ldmcsia S_1!, {DAT0, DAT1}
456 + ldrmi DAT2, [S_1], #4
457 + ldmcsia S_2!, {DAT4, DAT5}
458 + ldrmi DAT6, [S_2], #4
470 + ldrcsh DAT0, [S_1], #2
472 + ldrcsh DAT4, [S_2], #2
483 +.macro memcmp_long_inner_loop unaligned
485 + memcmp_process_head unaligned
486 + pld [S_2, #prefetch_distance*32 + 16]
487 + memcmp_process_tail
488 + memcmp_process_head unaligned
490 + memcmp_process_tail
493 + /* Just before the final (prefetch_distance+1) 32-byte blocks,
494 + * deal with final preloads */
495 + preload_trailing 0, S_1, N, DAT0
496 + preload_trailing 0, S_2, N, DAT0
497 + add N, N, #(prefetch_distance+2)*32 - 16
499 + memcmp_process_head unaligned
500 + memcmp_process_tail
503 + /* Trailing words and bytes */
506 + memcmp_trailing_15bytes unaligned
507 +199: /* Reached end without detecting a difference */
510 + pop {DAT1-DAT6, pc}
513 +.macro memcmp_short_inner_loop unaligned
514 + subs N, N, #16 /* simplifies inner loop termination */
517 + memcmp_process_head unaligned
518 + memcmp_process_tail
521 +122: /* Trailing words and bytes */
524 + memcmp_trailing_15bytes unaligned
525 +199: /* Reached end without detecting a difference */
528 + pop {DAT1-DAT6, pc}
532 + * int memcmp(const void *s1, const void *s2, size_t n);
534 + * a1 = pointer to buffer 1
535 + * a2 = pointer to buffer 2
536 + * a3 = number of bytes to compare (as unsigned chars)
538 + * a1 = >0/=0/<0 if s1 >/=/< s2
541 +.set prefetch_distance, 2
557 + push {DAT1-DAT6, lr}
558 + setend be /* lowest-addressed bytes are most significant */
560 + /* To preload ahead as we go, we need at least (prefetch_distance+2) 32-byte blocks */
561 + cmp N, #(prefetch_distance+3)*32 - 1
565 + /* Adjust N so that the decrement instruction can also test for
566 + * inner loop termination. We want it to stop when there are
567 + * (prefetch_distance+1) complete blocks to go. */
568 + sub N, N, #(prefetch_distance+2)*32
569 + preload_leading_step1 0, DAT0, S_1
570 + preload_leading_step1 0, DAT1, S_2
573 + rsb OFF, S_2, #0 /* no need to AND with 15 here */
574 + preload_leading_step2 0, DAT0, S_1, OFF, DAT2
575 + preload_leading_step2 0, DAT1, S_2, OFF, DAT2
576 + memcmp_leading_31bytes
577 +154: /* Second source now cacheline (32-byte) aligned; we have at
578 + * least one prefetch to go. */
579 + /* Prefetch offset is best selected such that it lies in the
580 + * first 8 of each 32 bytes - but it's just as easy to aim for
583 + rsb OFF, OFF, #32*prefetch_distance
586 + memcmp_long_inner_loop 0
587 +140: memcmp_long_inner_loop 1
589 +170: /* Short case */
592 + preload_all 0, 0, 0, S_1, N, DAT0, DAT1
593 + preload_all 0, 0, 0, S_2, N, DAT0, DAT1
598 + ldrb DAT0, [S_1], #1
599 + ldrb DAT4, [S_2], #1
604 +174: /* Second source now 4-byte aligned; we have 0 or more bytes to go */
607 + memcmp_short_inner_loop 0
608 +140: memcmp_short_inner_loop 1
610 +200: /* Difference found: determine sign. */
614 + pop {DAT1-DAT6, pc}
630 +++ b/arch/arm/lib/memcpy_rpi.S
633 +Copyright (c) 2013, Raspberry Pi Foundation
634 +Copyright (c) 2013, RISC OS Open Ltd
635 +All rights reserved.
637 +Redistribution and use in source and binary forms, with or without
638 +modification, are permitted provided that the following conditions are met:
639 + * Redistributions of source code must retain the above copyright
640 + notice, this list of conditions and the following disclaimer.
641 + * Redistributions in binary form must reproduce the above copyright
642 + notice, this list of conditions and the following disclaimer in the
643 + documentation and/or other materials provided with the distribution.
644 + * Neither the name of the copyright holder nor the
645 + names of its contributors may be used to endorse or promote products
646 + derived from this software without specific prior written permission.
648 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
649 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
650 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
651 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
652 +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
653 +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
654 +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
655 +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
656 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
657 +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
660 +#include <linux/linkage.h>
661 +#include "arm-mem.h"
662 +#include "memcpymove.h"
664 +/* Prevent the stack from becoming executable */
665 +#if defined(__linux__) && defined(__ELF__)
666 +.section .note.GNU-stack,"",%progbits
677 + * void *memcpy(void * restrict s1, const void * restrict s2, size_t n);
679 + * a1 = pointer to destination
680 + * a2 = pointer to source
681 + * a3 = number of bytes to copy
686 +.set prefetch_distance, 3
694 +++ b/arch/arm/lib/memcpymove.h
697 +Copyright (c) 2013, Raspberry Pi Foundation
698 +Copyright (c) 2013, RISC OS Open Ltd
699 +All rights reserved.
701 +Redistribution and use in source and binary forms, with or without
702 +modification, are permitted provided that the following conditions are met:
703 + * Redistributions of source code must retain the above copyright
704 + notice, this list of conditions and the following disclaimer.
705 + * Redistributions in binary form must reproduce the above copyright
706 + notice, this list of conditions and the following disclaimer in the
707 + documentation and/or other materials provided with the distribution.
708 + * Neither the name of the copyright holder nor the
709 + names of its contributors may be used to endorse or promote products
710 + derived from this software without specific prior written permission.
712 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
713 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
714 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
715 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
716 +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
717 +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
718 +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
719 +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
720 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
721 +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
724 +.macro unaligned_words backwards, align, use_pld, words, r0, r1, r2, r3, r4, r5, r6, r7, r8
727 + mov r1, r0, lsl #32-align*8
729 + orr r1, r1, r0, lsr #align*8
732 + mov r0, r1, lsr #align*8
734 + orr r0, r0, r1, lsl #32-align*8
740 + mov r2, r0, lsl #32-align*8
742 + orr r2, r2, r1, lsr #align*8
743 + mov r1, r1, lsl #32-align*8
744 + orr r1, r1, r0, lsr #align*8
748 + mov r0, r2, lsr #align*8
750 + orr r0, r0, r1, lsl #32-align*8
751 + mov r1, r1, lsr #align*8
752 + orr r1, r1, r2, lsl #32-align*8
758 + mov r4, r0, lsl #32-align*8
760 + orr r4, r4, r3, lsr #align*8
761 + mov r3, r3, lsl #32-align*8
762 + orr r3, r3, r2, lsr #align*8
763 + mov r2, r2, lsl #32-align*8
764 + orr r2, r2, r1, lsr #align*8
765 + mov r1, r1, lsl #32-align*8
766 + orr r1, r1, r0, lsr #align*8
767 + stmdb D!, {r1, r2, r3, r4}
770 + mov r0, r4, lsr #align*8
772 + orr r0, r0, r1, lsl #32-align*8
773 + mov r1, r1, lsr #align*8
774 + orr r1, r1, r2, lsl #32-align*8
775 + mov r2, r2, lsr #align*8
776 + orr r2, r2, r3, lsl #32-align*8
777 + mov r3, r3, lsr #align*8
778 + orr r3, r3, r4, lsl #32-align*8
779 + stmia D!, {r0, r1, r2, r3}
783 + ldmdb S!, {r4, r5, r6, r7}
784 + mov r8, r0, lsl #32-align*8
785 + ldmdb S!, {r0, r1, r2, r3}
789 + orr r8, r8, r7, lsr #align*8
790 + mov r7, r7, lsl #32-align*8
791 + orr r7, r7, r6, lsr #align*8
792 + mov r6, r6, lsl #32-align*8
793 + orr r6, r6, r5, lsr #align*8
794 + mov r5, r5, lsl #32-align*8
795 + orr r5, r5, r4, lsr #align*8
796 + mov r4, r4, lsl #32-align*8
797 + orr r4, r4, r3, lsr #align*8
798 + mov r3, r3, lsl #32-align*8
799 + orr r3, r3, r2, lsr #align*8
800 + mov r2, r2, lsl #32-align*8
801 + orr r2, r2, r1, lsr #align*8
802 + mov r1, r1, lsl #32-align*8
803 + orr r1, r1, r0, lsr #align*8
804 + stmdb D!, {r5, r6, r7, r8}
805 + stmdb D!, {r1, r2, r3, r4}
807 + ldmib S!, {r1, r2, r3, r4}
808 + mov r0, r8, lsr #align*8
809 + ldmib S!, {r5, r6, r7, r8}
813 + orr r0, r0, r1, lsl #32-align*8
814 + mov r1, r1, lsr #align*8
815 + orr r1, r1, r2, lsl #32-align*8
816 + mov r2, r2, lsr #align*8
817 + orr r2, r2, r3, lsl #32-align*8
818 + mov r3, r3, lsr #align*8
819 + orr r3, r3, r4, lsl #32-align*8
820 + mov r4, r4, lsr #align*8
821 + orr r4, r4, r5, lsl #32-align*8
822 + mov r5, r5, lsr #align*8
823 + orr r5, r5, r6, lsl #32-align*8
824 + mov r6, r6, lsr #align*8
825 + orr r6, r6, r7, lsl #32-align*8
826 + mov r7, r7, lsr #align*8
827 + orr r7, r7, r8, lsl #32-align*8
828 + stmia D!, {r0, r1, r2, r3}
829 + stmia D!, {r4, r5, r6, r7}
834 +.macro memcpy_leading_15bytes backwards, align
835 + movs DAT1, DAT2, lsl #31
838 + ldrmib DAT0, [S, #-1]!
839 + ldrcsh DAT1, [S, #-2]!
840 + strmib DAT0, [D, #-1]!
841 + strcsh DAT1, [D, #-2]!
843 + ldrmib DAT0, [S], #1
844 + ldrcsh DAT1, [S], #2
845 + strmib DAT0, [D], #1
846 + strcsh DAT1, [D], #2
848 + movs DAT1, DAT2, lsl #29
850 + ldrmi DAT0, [S, #-4]!
852 + ldmcsdb S!, {DAT1, DAT2}
854 + ldrcs DAT2, [S, #-4]!
855 + ldrcs DAT1, [S, #-4]!
857 + strmi DAT0, [D, #-4]!
858 + stmcsdb D!, {DAT1, DAT2}
860 + ldrmi DAT0, [S], #4
862 + ldmcsia S!, {DAT1, DAT2}
864 + ldrcs DAT1, [S], #4
865 + ldrcs DAT2, [S], #4
867 + strmi DAT0, [D], #4
868 + stmcsia D!, {DAT1, DAT2}
872 +.macro memcpy_trailing_15bytes backwards, align
876 + ldmcsdb S!, {DAT0, DAT1}
878 + ldrcs DAT1, [S, #-4]!
879 + ldrcs DAT0, [S, #-4]!
881 + ldrmi DAT2, [S, #-4]!
882 + stmcsdb D!, {DAT0, DAT1}
883 + strmi DAT2, [D, #-4]!
886 + ldmcsia S!, {DAT0, DAT1}
888 + ldrcs DAT0, [S], #4
889 + ldrcs DAT1, [S], #4
891 + ldrmi DAT2, [S], #4
892 + stmcsia D!, {DAT0, DAT1}
893 + strmi DAT2, [D], #4
897 + ldrcsh DAT0, [S, #-2]!
898 + ldrmib DAT1, [S, #-1]
899 + strcsh DAT0, [D, #-2]!
900 + strmib DAT1, [D, #-1]
902 + ldrcsh DAT0, [S], #2
904 + strcsh DAT0, [D], #2
909 +.macro memcpy_long_inner_loop backwards, align
912 + ldr DAT0, [S, #-align]!
914 + ldr LAST, [S, #-align]!
920 + ldmdb S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
922 + stmdb D!, {DAT4, DAT5, DAT6, LAST}
923 + stmdb D!, {DAT0, DAT1, DAT2, DAT3}
925 + ldmia S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
927 + stmia D!, {DAT0, DAT1, DAT2, DAT3}
928 + stmia D!, {DAT4, DAT5, DAT6, LAST}
931 + unaligned_words backwards, align, 1, 8, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7, LAST
935 + /* Just before the final (prefetch_distance+1) 32-byte blocks, deal with final preloads */
936 + preload_trailing backwards, S, N, OFF
937 + add N, N, #(prefetch_distance+2)*32 - 32
941 + ldmdb S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
942 + stmdb D!, {DAT4, DAT5, DAT6, LAST}
943 + stmdb D!, {DAT0, DAT1, DAT2, DAT3}
945 + ldmia S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
946 + stmia D!, {DAT0, DAT1, DAT2, DAT3}
947 + stmia D!, {DAT4, DAT5, DAT6, LAST}
950 + unaligned_words backwards, align, 0, 8, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7, LAST
957 + ldmnedb S!, {DAT0, DAT1, DAT2, LAST}
958 + stmnedb D!, {DAT0, DAT1, DAT2, LAST}
960 + ldmneia S!, {DAT0, DAT1, DAT2, LAST}
961 + stmneia D!, {DAT0, DAT1, DAT2, LAST}
965 + unaligned_words backwards, align, 0, 4, DAT0, DAT1, DAT2, DAT3, LAST
968 + /* Trailing words and bytes */
974 + memcpy_trailing_15bytes backwards, align
976 + pop {DAT3, DAT4, DAT5, DAT6, DAT7}
977 + pop {D, DAT1, DAT2, pc}
980 +.macro memcpy_medium_inner_loop backwards, align
984 + ldmdb S!, {DAT0, DAT1, DAT2, LAST}
986 + ldr LAST, [S, #-4]!
987 + ldr DAT2, [S, #-4]!
988 + ldr DAT1, [S, #-4]!
989 + ldr DAT0, [S, #-4]!
991 + stmdb D!, {DAT0, DAT1, DAT2, LAST}
994 + ldmia S!, {DAT0, DAT1, DAT2, LAST}
1001 + stmia D!, {DAT0, DAT1, DAT2, LAST}
1005 + /* Trailing words and bytes */
1008 + memcpy_trailing_15bytes backwards, align
1010 + pop {D, DAT1, DAT2, pc}
1013 +.macro memcpy_short_inner_loop backwards, align
1017 + ldmnedb S!, {DAT0, DAT1, DAT2, LAST}
1019 + ldrne LAST, [S, #-4]!
1020 + ldrne DAT2, [S, #-4]!
1021 + ldrne DAT1, [S, #-4]!
1022 + ldrne DAT0, [S, #-4]!
1024 + stmnedb D!, {DAT0, DAT1, DAT2, LAST}
1027 + ldmneia S!, {DAT0, DAT1, DAT2, LAST}
1029 + ldrne DAT0, [S], #4
1030 + ldrne DAT1, [S], #4
1031 + ldrne DAT2, [S], #4
1032 + ldrne LAST, [S], #4
1034 + stmneia D!, {DAT0, DAT1, DAT2, LAST}
1036 + memcpy_trailing_15bytes backwards, align
1038 + pop {D, DAT1, DAT2, pc}
1041 +.macro memcpy backwards
1058 + push {D, DAT1, DAT2, lr}
1060 + .cfi_def_cfa_offset 16
1061 + .cfi_rel_offset D, 0
1064 + .cfi_undefined DAT0
1065 + .cfi_rel_offset DAT1, 4
1066 + .cfi_rel_offset DAT2, 8
1067 + .cfi_undefined LAST
1068 + .cfi_rel_offset lr, 12
1075 + /* See if we're guaranteed to have at least one 16-byte aligned 16-byte write */
1078 + /* To preload ahead as we go, we need at least (prefetch_distance+2) 32-byte blocks */
1079 + cmp N, #(prefetch_distance+3)*32 - 1
1083 + push {DAT3, DAT4, DAT5, DAT6, DAT7}
1085 + .cfi_def_cfa_offset 36
1086 + .cfi_rel_offset D, 20
1087 + .cfi_rel_offset DAT1, 24
1088 + .cfi_rel_offset DAT2, 28
1089 + .cfi_rel_offset DAT3, 0
1090 + .cfi_rel_offset DAT4, 4
1091 + .cfi_rel_offset DAT5, 8
1092 + .cfi_rel_offset DAT6, 12
1093 + .cfi_rel_offset DAT7, 16
1094 + .cfi_rel_offset lr, 32
1096 + /* Adjust N so that the decrement instruction can also test for
1097 + * inner loop termination. We want it to stop when there are
1098 + * (prefetch_distance+1) complete blocks to go. */
1099 + sub N, N, #(prefetch_distance+2)*32
1100 + preload_leading_step1 backwards, DAT0, S
1102 + /* Bug in GAS: it accepts, but mis-assembles the instruction
1103 + * ands DAT2, D, #60, 2
1104 + * which sets DAT2 to the number of leading bytes until destination is aligned and also clears C (sets borrow)
1111 + rsb DAT2, DAT2, #16 /* number of leading bytes until destination aligned */
1113 + preload_leading_step2 backwards, DAT0, S, DAT2, OFF
1114 + memcpy_leading_15bytes backwards, 1
1115 +154: /* Destination now 16-byte aligned; we have at least one prefetch as well as at least one 16-byte output block */
1116 + /* Prefetch offset is best selected such that it lies in the first 8 of each 32 bytes - but it's just as easy to aim for the first one */
1120 + sub OFF, OFF, #32*(prefetch_distance+1)
1123 + rsb OFF, OFF, #32*prefetch_distance
1125 + movs DAT0, S, lsl #31
1129 + memcpy_long_inner_loop backwards, 0
1130 +155: memcpy_long_inner_loop backwards, 1
1131 +156: memcpy_long_inner_loop backwards, 2
1132 +157: memcpy_long_inner_loop backwards, 3
1134 + .cfi_def_cfa_offset 16
1135 + .cfi_rel_offset D, 0
1136 + .cfi_rel_offset DAT1, 4
1137 + .cfi_rel_offset DAT2, 8
1138 + .cfi_same_value DAT3
1139 + .cfi_same_value DAT4
1140 + .cfi_same_value DAT5
1141 + .cfi_same_value DAT6
1142 + .cfi_same_value DAT7
1143 + .cfi_rel_offset lr, 12
1145 +160: /* Medium case */
1146 + preload_all backwards, 0, 0, S, N, DAT2, OFF
1147 + sub N, N, #16 /* simplifies inner loop termination */
1154 + rsb DAT2, DAT2, #16
1156 + memcpy_leading_15bytes backwards, align
1157 +164: /* Destination now 16-byte aligned; we have at least one 16-byte output block */
1160 + memcpy_medium_inner_loop backwards, 0
1161 +140: memcpy_medium_inner_loop backwards, 1
1163 +170: /* Short case, less than 31 bytes, so no guarantee of at least one 16-byte block */
1166 + preload_all backwards, 1, 0, S, N, DAT2, LAST
1172 + ldrb DAT0, [S, #-1]!
1173 + strb DAT0, [D, #-1]!
1175 + ldrb DAT0, [S], #1
1176 + strb DAT0, [D], #1
1180 +174: /* Destination now 4-byte aligned; we have 0 or more output bytes to go */
1183 + memcpy_short_inner_loop backwards, 0
1184 +140: memcpy_short_inner_loop backwards, 1
1203 +++ b/arch/arm/lib/memmove_rpi.S
1206 +Copyright (c) 2013, Raspberry Pi Foundation
1207 +Copyright (c) 2013, RISC OS Open Ltd
1208 +All rights reserved.
1210 +Redistribution and use in source and binary forms, with or without
1211 +modification, are permitted provided that the following conditions are met:
1212 + * Redistributions of source code must retain the above copyright
1213 + notice, this list of conditions and the following disclaimer.
1214 + * Redistributions in binary form must reproduce the above copyright
1215 + notice, this list of conditions and the following disclaimer in the
1216 + documentation and/or other materials provided with the distribution.
1217 + * Neither the name of the copyright holder nor the
1218 + names of its contributors may be used to endorse or promote products
1219 + derived from this software without specific prior written permission.
1221 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
1222 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1223 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1224 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
1225 +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
1226 +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
1227 +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
1228 +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1229 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
1230 +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1233 +#include <linux/linkage.h>
1234 +#include "arm-mem.h"
1235 +#include "memcpymove.h"
1237 +/* Prevent the stack from becoming executable */
1238 +#if defined(__linux__) && defined(__ELF__)
1239 +.section .note.GNU-stack,"",%progbits
1244 + .object_arch armv4
1250 + * void *memmove(void *s1, const void *s2, size_t n);
1252 + * a1 = pointer to destination
1253 + * a2 = pointer to source
1254 + * a3 = number of bytes to copy
1259 +.set prefetch_distance, 3
1263 + bpl memcpy /* pl works even over -1 - 0 and 0x7fffffff - 0x80000000 boundaries */
1267 +++ b/arch/arm/lib/memset_rpi.S
1270 +Copyright (c) 2013, Raspberry Pi Foundation
1271 +Copyright (c) 2013, RISC OS Open Ltd
1272 +All rights reserved.
1274 +Redistribution and use in source and binary forms, with or without
1275 +modification, are permitted provided that the following conditions are met:
1276 + * Redistributions of source code must retain the above copyright
1277 + notice, this list of conditions and the following disclaimer.
1278 + * Redistributions in binary form must reproduce the above copyright
1279 + notice, this list of conditions and the following disclaimer in the
1280 + documentation and/or other materials provided with the distribution.
1281 + * Neither the name of the copyright holder nor the
1282 + names of its contributors may be used to endorse or promote products
1283 + derived from this software without specific prior written permission.
1285 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
1286 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1287 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1288 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
1289 +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
1290 +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
1291 +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
1292 +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1293 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
1294 +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1297 +#include <linux/linkage.h>
1298 +#include "arm-mem.h"
1300 +/* Prevent the stack from becoming executable */
1301 +#if defined(__linux__) && defined(__ELF__)
1302 +.section .note.GNU-stack,"",%progbits
1307 + .object_arch armv4
1313 + * void *memset(void *s, int c, size_t n);
1315 + * a1 = pointer to buffer to fill
1316 + * a2 = byte pattern to fill with (caller-narrowed)
1317 + * a3 = number of bytes to fill
1333 + orr DAT0, DAT0, DAT0, lsl #8
1335 + orr DAT0, DAT0, DAT0, lsl #16
1338 + /* See if we're guaranteed to have at least one 16-byte aligned 16-byte write */
1342 +161: sub N, N, #16 /* simplifies inner loop termination */
1343 + /* Leading words and bytes */
1346 + rsb DAT3, S, #0 /* bits 0-3 = number of leading bytes until aligned */
1347 + movs DAT2, DAT3, lsl #31
1349 + strmib DAT0, [S], #1
1351 + strcsh DAT0, [S], #2
1352 + movs DAT2, DAT3, lsl #29
1354 + strmi DAT0, [S], #4
1356 + stmcsia S!, {DAT0, DAT1}
1357 +164: /* Delayed set up of DAT2 and DAT3 so we could use them as scratch registers above */
1360 + /* Now the inner loop of 16-byte stores */
1361 +165: stmia S!, {DAT0, DAT1, DAT2, DAT3}
1364 +166: /* Trailing words and bytes */
1365 + movs N, N, lsl #29
1366 + stmcsia S!, {DAT0, DAT1}
1367 + strmi DAT0, [S], #4
1369 + strcsh DAT0, [S], #2
1373 +170: /* Short case */
1380 + strb DAT0, [S], #1
1384 + stmneia S!, {DAT0, DAT1, DAT2, DAT3}
1393 +ENDPROC(__memset64)
1394 +ENDPROC(__memset32)
1397 --- a/arch/arm/lib/uaccess_with_memcpy.c
1398 +++ b/arch/arm/lib/uaccess_with_memcpy.c
1400 #include <asm/current.h>
1401 #include <asm/page.h>
1403 +#ifndef COPY_FROM_USER_THRESHOLD
1404 +#define COPY_FROM_USER_THRESHOLD 64
1407 +#ifndef COPY_TO_USER_THRESHOLD
1408 +#define COPY_TO_USER_THRESHOLD 64
1412 pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
1414 @@ -43,7 +51,7 @@ pin_page_for_write(const void __user *_a
1417 pmd = pmd_offset(pud, addr);
1418 - if (unlikely(pmd_none(*pmd)))
1419 + if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd)))
1423 @@ -86,7 +94,46 @@ pin_page_for_write(const void __user *_a
1427 -static unsigned long noinline
1429 +pin_page_for_read(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
1431 + unsigned long addr = (unsigned long)_addr;
1439 + pgd = pgd_offset(current->mm, addr);
1440 + if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd)))
1443 + p4d = p4d_offset(pgd, addr);
1444 + if (unlikely(p4d_none(*p4d) || p4d_bad(*p4d)))
1447 + pud = pud_offset(p4d, addr);
1448 + if (unlikely(pud_none(*pud) || pud_bad(*pud)))
1451 + pmd = pmd_offset(pud, addr);
1452 + if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd)))
1455 + pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl);
1456 + if (unlikely(!pte_present(*pte) || !pte_young(*pte))) {
1457 + pte_unmap_unlock(pte, ptl);
1467 +unsigned long noinline
1468 __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
1470 unsigned long ua_flags;
1471 @@ -139,6 +186,57 @@ out:
1475 +unsigned long noinline
1476 +__copy_from_user_memcpy(void *to, const void __user *from, unsigned long n)
1478 + unsigned long ua_flags;
1481 + if (unlikely(uaccess_kernel())) {
1482 + memcpy(to, (const void *)from, n);
1486 + /* the mmap semaphore is taken only if not in an atomic context */
1487 + atomic = in_atomic();
1490 + mmap_read_lock(current->mm);
1496 + while (!pin_page_for_read(from, &pte, &ptl)) {
1499 + mmap_read_unlock(current->mm);
1500 + if (__get_user(temp, (char __user *)from))
1503 + mmap_read_lock(current->mm);
1506 + tocopy = (~(unsigned long)from & ~PAGE_MASK) + 1;
1510 + ua_flags = uaccess_save_and_enable();
1511 + memcpy(to, (const void *)from, tocopy);
1512 + uaccess_restore(ua_flags);
1517 + pte_unmap_unlock(pte, ptl);
1520 + mmap_read_unlock(current->mm);
1527 arm_copy_to_user(void __user *to, const void *from, unsigned long n)
1529 @@ -149,7 +247,7 @@ arm_copy_to_user(void __user *to, const
1530 * With frame pointer disabled, tail call optimization kicks in
1531 * as well making this test almost invisible.
1534 + if (n < COPY_TO_USER_THRESHOLD) {
1535 unsigned long ua_flags = uaccess_save_and_enable();
1536 n = __copy_to_user_std(to, from, n);
1537 uaccess_restore(ua_flags);
1538 @@ -159,6 +257,32 @@ arm_copy_to_user(void __user *to, const
1543 +unsigned long __must_check
1544 +arm_copy_from_user(void *to, const void __user *from, unsigned long n)
1546 +#ifdef CONFIG_BCM2835_FAST_MEMCPY
1548 + * This test is stubbed out of the main function above to keep
1549 + * the overhead for small copies low by avoiding a large
1550 + * register dump on the stack just to reload them right away.
1551 + * With frame pointer disabled, tail call optimization kicks in
1552 + * as well making this test almost invisible.
1554 + if (n < COPY_TO_USER_THRESHOLD) {
1555 + unsigned long ua_flags = uaccess_save_and_enable();
1556 + n = __copy_from_user_std(to, from, n);
1557 + uaccess_restore(ua_flags);
1559 + n = __copy_from_user_memcpy(to, from, n);
1562 + unsigned long ua_flags = uaccess_save_and_enable();
1563 + n = __copy_from_user_std(to, from, n);
1564 + uaccess_restore(ua_flags);
1569 static unsigned long noinline
1570 __clear_user_memset(void __user *addr, unsigned long n)
1571 --- a/arch/arm/mach-bcm/Kconfig
1572 +++ b/arch/arm/mach-bcm/Kconfig
1573 @@ -184,6 +184,13 @@ config ARCH_BCM_53573
1574 The base chip is BCM53573 and there are some packaging modifications
1575 like BCM47189 and BCM47452.
1577 +config BCM2835_FAST_MEMCPY
1578 + bool "Enable optimized __copy_to_user and __copy_from_user"
1579 + depends on ARCH_BCM2835 && ARCH_MULTI_V6
1582 + Optimized versions of __copy_to_user and __copy_from_user for Pi1.
1584 config ARCH_BCM_63XX
1585 bool "Broadcom BCM63xx DSL SoC"
1586 depends on ARCH_MULTI_V7