--- /dev/null
+From 088d41f6c7793404875f160730a198cd4a0c1a48 Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Thu, 22 Sep 2016 14:02:45 +0200
+Subject: [PATCH 01/32] sparc32,leon: Build with -mcpu=leon3 for SPARC_LEON
+
+Apart from using LEON3 instruction timing it allows for usage of the
+CASA instruction.
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ arch/sparc/Makefile | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
+index 4a0919581697..65cd5c7dc160 100644
+--- a/arch/sparc/Makefile
++++ b/arch/sparc/Makefile
+@@ -25,14 +25,20 @@ KBUILD_LDFLAGS := -m elf32_sparc
+ export BITS := 32
+ UTS_MACHINE := sparc
+
++ifeq ($(CONFIG_SPARC_LEON),y)
++SPARC_MCPU=leon3
++else
++SPARC_MCPU=v8
++endif
++
+ # We are adding -Wa,-Av8 to KBUILD_CFLAGS to deal with a specs bug in some
+ # versions of gcc. Some gcc versions won't pass -Av8 to binutils when you
+ # give -mcpu=v8. This silently worked with older bintutils versions but
+ # does not any more.
+-KBUILD_CFLAGS += -m32 -mcpu=v8 -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7
++KBUILD_CFLAGS += -m32 -mcpu=$(SPARC_MCPU) -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7
+ KBUILD_CFLAGS += -Wa,-Av8
+
+-KBUILD_AFLAGS += -m32 -Wa,-Av8
++KBUILD_AFLAGS += -m32 -mcpu=$(SPARC_MCPU) -Wa,-Av8
+
+ else
+ #####
+--
+2.34.1
+
--- /dev/null
+From 4500a8233c7e7aa051a3175839e3d24f87e8e1cd Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Thu, 6 Jul 2017 11:32:29 +0200
+Subject: [PATCH 02/32] sparc32,leon: Add support for atomic operations with
+ CAS
+
+Adds support for doing atomic operations using CAS on LEON instead of
+using the spinlock based emulated atomics. It is configurable for LEON
+as not all LEON hardware supports the CASA instruction.
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ arch/sparc/Kconfig | 17 +++-
+ arch/sparc/include/asm/atomic_32.h | 42 ++++++++-
+ arch/sparc/include/asm/bitops_32.h | 13 +++
+ arch/sparc/include/asm/cmpxchg_32.h | 41 +++++++++
+ arch/sparc/lib/Makefile | 4 +-
+ arch/sparc/lib/atomic_cas_32.S | 73 ++++++++++++++++
+ arch/sparc/lib/bitops_cas_32.S | 130 ++++++++++++++++++++++++++++
+ 7 files changed, 315 insertions(+), 5 deletions(-)
+ create mode 100644 arch/sparc/lib/atomic_cas_32.S
+ create mode 100644 arch/sparc/lib/bitops_cas_32.S
+
+diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
+index b5ed89342059..178496eff3de 100644
+--- a/arch/sparc/Kconfig
++++ b/arch/sparc/Kconfig
+@@ -206,10 +206,10 @@ config ARCH_MAY_HAVE_PC_FDC
+
+ config EMULATED_CMPXCHG
+ bool
+- default y if SPARC32
++ default y if SPARC32_NO_CAS
+ help
+- Sparc32 does not have a CAS instruction like sparc64. cmpxchg()
+- is emulated, and therefore it is not completely atomic.
++ Sparc32 does not in general have a CAS instruction like sparc64.
++ When cmpxchg() is emulated it is not completely atomic.
+
+ # Makefile helpers
+ config SPARC32_SMP
+@@ -372,6 +372,10 @@ config SERIAL_CONSOLE
+
+ If unsure, say N.
+
++config SPARC32_NO_CAS
++ bool
++ default y if SPARC32 && !SPARC_LEON_CAS
++
+ config SPARC_LEON
+ bool "Sparc Leon processor family"
+ depends on SPARC32
+@@ -388,6 +392,13 @@ config SPARC_LEON
+ toolchain at www.gaisler.com.
+
+ if SPARC_LEON
++config SPARC_LEON_CAS
++ bool "Use Compare and Swap"
++ default y
++ help
++ If you say Y here the kernel will use the CASA instruction. Enable
++ this only if the LEON processor has hardware support for it.
++
+ menu "U-Boot options"
+
+ config UBOOT_LOAD_ADDR
+diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
+index efad5532f169..f402e297fc25 100644
+--- a/arch/sparc/include/asm/atomic_32.h
++++ b/arch/sparc/include/asm/atomic_32.h
+@@ -18,6 +18,46 @@
+ #include <asm/barrier.h>
+ #include <asm-generic/atomic64.h>
+
++#define atomic_read(v) READ_ONCE((v)->counter)
++
++#ifdef CONFIG_SPARC_LEON_CAS
++
++#define ATOMIC_OP(op) \
++void atomic_##op(int, atomic_t *);
++
++#define ATOMIC_OP_RETURN(op) \
++int atomic_##op##_return(int, atomic_t *);
++
++#define ATOMIC_FETCH_OP(op) \
++int atomic_fetch_##op(int, atomic_t *);
++
++#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
++
++ATOMIC_OPS(add)
++ATOMIC_OPS(sub)
++
++#undef ATOMIC_OPS
++#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
++
++ATOMIC_OPS(and)
++ATOMIC_OPS(or)
++ATOMIC_OPS(xor)
++
++#undef ATOMIC_OPS
++#undef ATOMIC_FETCH_OP
++#undef ATOMIC_OP_RETURN
++#undef ATOMIC_OP
++
++#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
++#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
++
++static inline int atomic_xchg(atomic_t *v, int new)
++{
++ return xchg(&v->counter, new);
++}
++
++#else /* CONFIG_SPARC_LEON_CAS */
++
+ int atomic_add_return(int, atomic_t *);
+ int atomic_fetch_add(int, atomic_t *);
+ int atomic_fetch_and(int, atomic_t *);
+@@ -32,7 +72,7 @@ void atomic_set(atomic_t *, int);
+
+ #define atomic_set_release(v, i) atomic_set((v), (i))
+
+-#define atomic_read(v) READ_ONCE((v)->counter)
++#endif /* CONFIG_SPARC_LEON_CAS */
+
+ #define atomic_add(i, v) ((void)atomic_add_return( (int)(i), (v)))
+ #define atomic_sub(i, v) ((void)atomic_add_return(-(int)(i), (v)))
+diff --git a/arch/sparc/include/asm/bitops_32.h b/arch/sparc/include/asm/bitops_32.h
+index 0ceff3b915a8..68a9f0766b80 100644
+--- a/arch/sparc/include/asm/bitops_32.h
++++ b/arch/sparc/include/asm/bitops_32.h
+@@ -19,6 +19,17 @@
+ #error only <linux/bitops.h> can be included directly
+ #endif
+
++#ifdef CONFIG_SPARC_LEON_CAS
++
++int test_and_set_bit(unsigned long nr, volatile unsigned long *addr);
++int test_and_clear_bit(unsigned long nr, volatile unsigned long *addr);
++int test_and_change_bit(unsigned long nr, volatile unsigned long *addr);
++void set_bit(unsigned long nr, volatile unsigned long *addr);
++void clear_bit(unsigned long nr, volatile unsigned long *addr);
++void change_bit(unsigned long nr, volatile unsigned long *addr);
++
++#else /* CONFIG_SPARC_LEON_CAS */
++
+ unsigned long ___set_bit(unsigned long *addr, unsigned long mask);
+ unsigned long ___clear_bit(unsigned long *addr, unsigned long mask);
+ unsigned long ___change_bit(unsigned long *addr, unsigned long mask);
+@@ -89,6 +100,8 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
+ (void) ___change_bit(ADDR, mask);
+ }
+
++#endif /* CONFIG_SPARC_LEON_CAS */
++
+ #include <asm-generic/bitops/non-atomic.h>
+
+ #include <asm-generic/bitops/ffz.h>
+diff --git a/arch/sparc/include/asm/cmpxchg_32.h b/arch/sparc/include/asm/cmpxchg_32.h
+index c73b5a3ab7b9..906380661c29 100644
+--- a/arch/sparc/include/asm/cmpxchg_32.h
++++ b/arch/sparc/include/asm/cmpxchg_32.h
+@@ -12,14 +12,34 @@
+ #ifndef __ARCH_SPARC_CMPXCHG__
+ #define __ARCH_SPARC_CMPXCHG__
+
++#ifdef CONFIG_SPARC_LEON_CAS
++
++static inline unsigned long xchg_u32(__volatile__ unsigned long *m, unsigned long val)
++{
++ __asm__ __volatile__("swap [%2], %0"
++ : "=&r" (val)
++ : "0" (val), "r" (m)
++ : "memory");
++ return val;
++}
++
++#else /* CONFIG_SPARC_LEON_CAS */
++
+ unsigned long __xchg_u32(volatile u32 *m, u32 new);
++
++#endif /* CONFIG_SPARC_LEON_CAS */
++
+ void __xchg_called_with_bad_pointer(void);
+
+ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int size)
+ {
+ switch (size) {
+ case 4:
++#ifdef CONFIG_SPARC_LEON_CAS
++ return xchg_u32(ptr, x);
++#else
+ return __xchg_u32(ptr, x);
++#endif
+ }
+ __xchg_called_with_bad_pointer();
+ return x;
+@@ -38,8 +58,23 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int
+
+ /* bug catcher for when unsupported size is used - won't link */
+ void __cmpxchg_called_with_bad_pointer(void);
++
+ /* we only need to support cmpxchg of a u32 on sparc */
++#ifdef CONFIG_SPARC_LEON_CAS
++static inline unsigned long
++__cmpxchg_u32(volatile int *m, int old, int new)
++{
++ __asm__ __volatile__("casa [%2] 0xb, %3, %0"
++ : "=&r" (new)
++ : "0" (new), "r" (m), "r" (old)
++ : "memory");
++
++ return new;
++}
++
++#else /* CONFIG_SPARC_LEON_CAS */
+ unsigned long __cmpxchg_u32(volatile u32 *m, u32 old, u32 new_);
++#endif /* CONFIG_SPARC_LEON_CAS */
+
+ /* don't worry...optimizer will get rid of most of this */
+ static inline unsigned long
+@@ -63,8 +98,14 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new_, int size)
+ (unsigned long)_n_, sizeof(*(ptr))); \
+ })
+
++/*
++ * We can not support 64-bit cmpxchg using LEON CASA. Better fail to link than
++ * pretend we can support something that is not atomic towards 64-bit writes.
++ */
++#ifndef CONFIG_SPARC_LEON_CAS
+ u64 __cmpxchg_u64(u64 *ptr, u64 old, u64 new);
+ #define cmpxchg64(ptr, old, new) __cmpxchg_u64(ptr, old, new)
++#endif
+
+ #include <asm-generic/cmpxchg-local.h>
+
+diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
+index 063556fe2cb1..234c76cc1a6f 100644
+--- a/arch/sparc/lib/Makefile
++++ b/arch/sparc/lib/Makefile
+@@ -52,5 +52,7 @@ lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o
+ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
+
+ obj-$(CONFIG_SPARC64) += iomap.o
+-obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o
++obj-$(CONFIG_SPARC32) += ucmpdi2.o
++obj-$(CONFIG_SPARC32_NO_CAS) += atomic32.o
++obj-$(CONFIG_SPARC_LEON_CAS) += atomic_cas_32.o bitops_cas_32.o
+ obj-$(CONFIG_SPARC64) += PeeCeeI.o
+diff --git a/arch/sparc/lib/atomic_cas_32.S b/arch/sparc/lib/atomic_cas_32.S
+new file mode 100644
+index 000000000000..d68b53b82833
+--- /dev/null
++++ b/arch/sparc/lib/atomic_cas_32.S
+@@ -0,0 +1,73 @@
++/* atomic_cas_32.S
++ *
++ * Copyright (C) 1999, 2007 2012 David S. Miller (davem@davemloft.net)
++ *
++ * Adaption for LEON with CAS from atomic_64.S, by Andreas Larsson
++ * (andreas@gaisler.com).
++ */
++
++#include <linux/linkage.h>
++#include <asm/asi.h>
++#include <asm/export.h>
++
++ .text
++
++#define ATOMIC_OP(op) \
++ENTRY(atomic_##op) /* %o0 = increment, %o1 = atomic_ptr */ \
++1: ld [%o1], %g1; \
++ op %g1, %o0, %g7; \
++ casa [%o1] 0xb, %g1, %g7; \
++ cmp %g1, %g7; \
++ bne 1b; \
++ nop; \
++ retl; \
++ nop; \
++ENDPROC(atomic_##op); \
++EXPORT_SYMBOL(atomic_##op);
++
++#define ATOMIC_OP_RETURN(op) \
++ENTRY(atomic_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */ \
++1: ld [%o1], %g1; \
++ op %g1, %o0, %g7; \
++ casa [%o1] 0xb, %g1, %g7; \
++ cmp %g1, %g7; \
++ bne 1b; \
++ nop; \
++ retl; \
++ op %g1, %o0, %o0; \
++ENDPROC(atomic_##op##_return); \
++EXPORT_SYMBOL(atomic_##op##_return);
++
++#define ATOMIC_FETCH_OP(op) \
++ENTRY(atomic_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */ \
++1: ld [%o1], %g1; \
++ op %g1, %o0, %g7; \
++ casa [%o1] 0xb, %g1, %g7; \
++ cmp %g1, %g7; \
++ bne 1b; \
++ nop; \
++ retl; \
++ mov %g1, %o0; \
++ENDPROC(atomic_fetch_##op); \
++EXPORT_SYMBOL(atomic_fetch_##op);
++
++ATOMIC_OP(add)
++ATOMIC_OP_RETURN(add)
++ATOMIC_FETCH_OP(add)
++
++ATOMIC_OP(sub)
++ATOMIC_OP_RETURN(sub)
++ATOMIC_FETCH_OP(sub)
++
++ATOMIC_OP(and)
++ATOMIC_FETCH_OP(and)
++
++ATOMIC_OP(or)
++ATOMIC_FETCH_OP(or)
++
++ATOMIC_OP(xor)
++ATOMIC_FETCH_OP(xor)
++
++#undef ATOMIC_FETCH_OP
++#undef ATOMIC_OP_RETURN
++#undef ATOMIC_OP
+diff --git a/arch/sparc/lib/bitops_cas_32.S b/arch/sparc/lib/bitops_cas_32.S
+new file mode 100644
+index 000000000000..184e212fa59f
+--- /dev/null
++++ b/arch/sparc/lib/bitops_cas_32.S
+@@ -0,0 +1,130 @@
++/* bitops_cas_32.S: Sparc32 atomic bit operations for LEON with CAS.
++ *
++ * Copyright (C) 2000, 2007 David S. Miller (davem@davemloft.net)
++ *
++ * Adaption for LEON with CAS from bitops.S, by Andreas Larsson
++ * (andreas@gaisler.com).
++ */
++
++#include <linux/linkage.h>
++#include <asm/asi.h>
++#include <asm/export.h>
++
++ .text
++
++ENTRY(test_and_set_bit) /* %o0=nr, %o1=addr */
++ srl %o0, 5, %g1
++ mov 1, %o2
++ sll %g1, 2, %g3
++ and %o0, 31, %g2
++ sll %o2, %g2, %o2
++ add %o1, %g3, %o1
++1: ld [%o1], %g7
++ or %g7, %o2, %g1
++ casa [%o1] 0xb, %g7, %g1
++ cmp %g7, %g1
++ bne 1b
++ clr %o0
++ andcc %g7, %o2, %g2
++ bne,a 2f
++ mov 1, %o0
++2: retl
++ nop
++ENDPROC(test_and_set_bit)
++EXPORT_SYMBOL(test_and_set_bit)
++
++ENTRY(test_and_clear_bit) /* %o0=nr, %o1=addr */
++ srl %o0, 5, %g1
++ mov 1, %o2
++ sll %g1, 2, %g3
++ and %o0, 31, %g2
++ sll %o2, %g2, %o2
++ add %o1, %g3, %o1
++1: ld [%o1], %g7
++ andn %g7, %o2, %g1
++ casa [%o1] 0xb, %g7, %g1
++ cmp %g7, %g1
++ bne 1b
++ clr %o0
++ andcc %g7, %o2, %g2
++ bne,a 2f
++ mov 1, %o0
++2: retl
++ nop
++ENDPROC(test_and_clear_bit)
++EXPORT_SYMBOL(test_and_clear_bit)
++
++ENTRY(test_and_change_bit) /* %o0=nr, %o1=addr */
++ srl %o0, 5, %g1
++ mov 1, %o2
++ sll %g1, 2, %g3
++ and %o0, 31, %g2
++ sll %o2, %g2, %o2
++ add %o1, %g3, %o1
++1: ld [%o1], %g7
++ xor %g7, %o2, %g1
++ casa [%o1] 0xb, %g7, %g1
++ cmp %g7, %g1
++ bne 1b
++ clr %o0
++ andcc %g7, %o2, %g2
++ bne,a 2f
++ mov 1, %o0
++2: retl
++ nop
++ENDPROC(test_and_change_bit)
++EXPORT_SYMBOL(test_and_change_bit)
++
++ENTRY(set_bit) /* %o0=nr, %o1=addr */
++ srl %o0, 5, %g1
++ mov 1, %o2
++ sll %g1, 2, %g3
++ and %o0, 31, %g2
++ sll %o2, %g2, %o2
++ add %o1, %g3, %o1
++1: ld [%o1], %g7
++ or %g7, %o2, %g1
++ casa [%o1] 0xb, %g7, %g1
++ cmp %g7, %g1
++ bne 1b
++ nop
++ retl
++ nop
++ENDPROC(set_bit)
++EXPORT_SYMBOL(set_bit)
++
++ENTRY(clear_bit) /* %o0=nr, %o1=addr */
++ srl %o0, 5, %g1
++ mov 1, %o2
++ sll %g1, 2, %g3
++ and %o0, 31, %g2
++ sll %o2, %g2, %o2
++ add %o1, %g3, %o1
++1: ld [%o1], %g7
++ andn %g7, %o2, %g1
++ casa [%o1] 0xb, %g7, %g1
++ cmp %g7, %g1
++ bne 1b
++ nop
++ retl
++ nop
++ENDPROC(clear_bit)
++EXPORT_SYMBOL(clear_bit)
++
++ENTRY(change_bit) /* %o0=nr, %o1=addr */
++ srl %o0, 5, %g1
++ mov 1, %o2
++ sll %g1, 2, %g3
++ and %o0, 31, %g2
++ sll %o2, %g2, %o2
++ add %o1, %g3, %o1
++1: ld [%o1], %g7
++ xor %g7, %o2, %g1
++ casa [%o1] 0xb, %g7, %g1
++ cmp %g7, %g1
++ bne 1b
++ nop
++ retl
++ nop
++ENDPROC(change_bit)
++EXPORT_SYMBOL(change_bit)
+--
+2.34.1
+
--- /dev/null
+From aa5a076de52f8eae1ad76465c57f0168ebaa43a1 Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Wed, 20 Sep 2017 11:13:51 +0200
+Subject: [PATCH 03/32] sparc32,leon: Require CAS for SMP
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ arch/sparc/Kconfig | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
+index 178496eff3de..2099e1886450 100644
+--- a/arch/sparc/Kconfig
++++ b/arch/sparc/Kconfig
+@@ -162,6 +162,7 @@ menu "Processor type and features"
+
+ config SMP
+ bool "Symmetric multi-processing support"
++ select SPARC_LEON_CAS if SPARC_LEON
+ help
+ This enables support for systems with more than one CPU. If you have
+ a system with only one CPU, say N. If you have a system with more
+@@ -182,6 +183,8 @@ config SMP
+
+ If you don't know what to do here, say N.
+
++ Enables usage of CAS on LEON systems.
++
+ config NR_CPUS
+ int "Maximum number of CPUs"
+ depends on SMP
+--
+2.34.1
+
--- /dev/null
+From 71be356f33277110d573628121f0c6b27335e184 Mon Sep 17 00:00:00 2001
+From: Daniel Hellstrom <daniel@gaisler.com>
+Date: Wed, 18 Feb 2015 11:48:42 +0100
+Subject: [PATCH 04/32] sparc: optimize MMU fault trap entry
+
+Signed-off-by: Daniel Hellstrom <daniel@gaisler.com>
+---
+ arch/sparc/kernel/entry.S | 24 ++++++++++--------------
+ arch/sparc/mm/fault_32.c | 3 ---
+ 2 files changed, 10 insertions(+), 17 deletions(-)
+
+diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
+index d58940280f8d..d176493ea0b5 100644
+--- a/arch/sparc/kernel/entry.S
++++ b/arch/sparc/kernel/entry.S
+@@ -775,23 +775,19 @@ SUN_PI_(lda [%l5] ASI_M_MMUREGS, %l6) ! read sfar first
+ LEON_PI(lda [%l4] ASI_LEON_MMUREGS, %l5) ! read sfsr last
+ SUN_PI_(lda [%l4] ASI_M_MMUREGS, %l5) ! read sfsr last
+
+- andn %l6, 0xfff, %l6
+- srl %l5, 6, %l5 ! and encode all info into l7
+-
+- and %l5, 2, %l5
+- or %l5, %l6, %l6
+-
+- or %l6, %l7, %l7 ! l7 = [addr,write,txtfault]
++ and %l5, 0x0e0, %l5 ! save AT
++ andn %l6, 0xfff, %l6 ! save faulting page
++ or %l5, %l6, %l6 ! and encode all info into l7
++ or %l6, %l7, %l7 ! l7 = [addr,AT,txtfault]
+
+ SAVE_ALL
+
+- mov %l7, %o1
+- mov %l7, %o2
+- and %o1, 1, %o1 ! arg2 = text_faultp
+- mov %l7, %o3
+- and %o2, 2, %o2 ! arg3 = writep
+- andn %o3, 0xfff, %o3 ! arg4 = faulting address
+-
++ andcc %l7, 1, %o1 ! arg2 = text_faultp
++ and %l7, 0x80, %o2 ! arg3 = writep from AT
++ bne 1f
++ mov %l1, %o3 ! arg4 = faulting TEXT address (PC)
++ andn %l7, 0xfff, %o3 ! arg4 = faulting DATA address
++1:
+ wr %l0, PSR_ET, %psr
+ WRITE_PAUSE
+
+diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
+index 40ce087dfecf..e2c3257dfdab 100644
+--- a/arch/sparc/mm/fault_32.c
++++ b/arch/sparc/mm/fault_32.c
+@@ -169,9 +169,6 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
+ vm_fault_t fault;
+ unsigned int flags = FAULT_FLAG_DEFAULT;
+
+- if (text_fault)
+- address = regs->pc;
+-
+ /*
+ * We fault-in kernel-space virtual memory on-demand. The
+ * 'reference' page table is init_mm.pgd.
+--
+2.34.1
+
--- /dev/null
+From a8318758079240c1e3f08047dbf25730d9e1b952 Mon Sep 17 00:00:00 2001
+From: Daniel Hellstrom <daniel@gaisler.com>
+Date: Wed, 18 Feb 2015 11:49:02 +0100
+Subject: [PATCH 05/32] sparc,leon: workaround for MMU errata
+
+In some cases the FSR/FAR MMU registers are not updated according
+to the MMU data fault trap that occurs. This fix makes an extra
+check to determine if a data fault trap could have been caused by
+a instruction fault.
+
+This is typically triggered when the branch prediction unit is
+enabled and it predicts to jump to a page marked invalid.
+
+Signed-off-by: Daniel Hellstrom <daniel@gaisler.com>
+---
+ arch/sparc/kernel/entry.S | 11 +++++++++++
+ arch/sparc/mm/leon_mm.c | 2 ++
+ 2 files changed, 13 insertions(+)
+
+diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
+index d176493ea0b5..eaefd7b4961e 100644
+--- a/arch/sparc/kernel/entry.S
++++ b/arch/sparc/kernel/entry.S
+@@ -786,7 +786,18 @@ SUN_PI_(lda [%l4] ASI_M_MMUREGS, %l5) ! read sfsr last
+ and %l7, 0x80, %o2 ! arg3 = writep from AT
+ bne 1f
+ mov %l1, %o3 ! arg4 = faulting TEXT address (PC)
++
++#if CONFIG_SPARC_LEON
++ /* LEON errata where FSR must be read to determine text/data trap */
++ and %l7, 0xc0, %o4
++ cmp %o4, 0x40
++ bne 1f
++ andn %l7, 0xfff, %o3 ! arg4 = faulting DATA address
++ mov 1, %o1 ! Force text fault even if got DFAULT
++ ! arg4 = Leave o3 to be DATA address
++#else
+ andn %l7, 0xfff, %o3 ! arg4 = faulting DATA address
++#endif
+ 1:
+ wr %l0, PSR_ET, %psr
+ WRITE_PAUSE
+diff --git a/arch/sparc/mm/leon_mm.c b/arch/sparc/mm/leon_mm.c
+index ec61ff1f96b7..1bb2266d0b80 100644
+--- a/arch/sparc/mm/leon_mm.c
++++ b/arch/sparc/mm/leon_mm.c
+@@ -15,6 +15,7 @@
+ #include <asm/asi.h>
+ #include <asm/leon.h>
+ #include <asm/tlbflush.h>
++#include <asm/pgtsrmmu.h>
+
+ #include "mm_32.h"
+
+@@ -269,6 +270,7 @@ int __init leon_flush_needed(void)
+
+ void leon_switch_mm(void)
+ {
++ srmmu_get_fstatus(); /* errata, must clear FSR.OW to trust next fault */
+ flush_tlb_mm((void *)0);
+ if (leon_flush_during_switch)
+ leon_flush_cache_all();
+--
+2.34.1
+
--- /dev/null
+From f450d7aa15f5ed9b5fca725ba0151967583aebc4 Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Wed, 29 Apr 2015 14:05:30 +0200
+Subject: [PATCH 06/32] sparc: leon: Distinguish between IRQMP and IRQAMP
+ interrupt controllers
+
+This makes sure that the icsel[] registers are only used when having
+multiple interrupt controllers in the interrupt controller core.
+
+If trying to read the icsel[0] register on an IRQMP controller, the
+pending register are actually read which is not necessary zero. Trying
+to read the ampctrl register to figure out number of interrupt
+controllers will wrap to the interrupt level register on a IRQMP core
+which contains zeroes in bits 31-28 which will be interpreted correctly.
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ arch/sparc/include/asm/leon_amba.h | 1 +
+ arch/sparc/kernel/leon_kernel.c | 11 ++++++++---
+ 2 files changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/arch/sparc/include/asm/leon_amba.h b/arch/sparc/include/asm/leon_amba.h
+index 6433a93f5126..29f6952b23cf 100644
+--- a/arch/sparc/include/asm/leon_amba.h
++++ b/arch/sparc/include/asm/leon_amba.h
+@@ -81,6 +81,7 @@ struct amba_prom_registers {
+
+ #define LEON3_IRQMPSTATUS_CPUNR 28
+ #define LEON3_IRQMPSTATUS_BROADCAST 27
++#define LEON3_IRQMPAMPCTRL_NCTRL 28
+
+ #define GPTIMER_CONFIG_IRQNT(a) (((a) >> 3) & 0x1f)
+ #define GPTIMER_CONFIG_ISSEP(a) ((a) & (1 << 8))
+diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c
+index 39229940d725..779a0100bf8c 100644
+--- a/arch/sparc/kernel/leon_kernel.c
++++ b/arch/sparc/kernel/leon_kernel.c
+@@ -313,6 +313,7 @@ void __init leon_init_timers(void)
+ int err;
+ u32 config;
+ u32 ctrl;
++ u32 nirqctrl;
+
+ sparc_config.get_cycles_offset = leon_cycles_offset;
+ sparc_config.cs_period = 1000000 / HZ;
+@@ -410,9 +411,13 @@ void __init leon_init_timers(void)
+ * accessed anyway.
+ * In AMP systems, Linux must run on CPU0 for the time being.
+ */
+- icsel = LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->icsel[boot_cpu_id/8]);
+- icsel = (icsel >> ((7 - (boot_cpu_id&0x7)) * 4)) & 0xf;
+- leon3_irqctrl_regs += icsel;
++ nirqctrl = LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->ampctrl) >>
++ LEON3_IRQMPAMPCTRL_NCTRL;
++ if (nirqctrl) {
++ icsel = LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->icsel[boot_cpu_id/8]);
++ icsel = (icsel >> ((7 - (boot_cpu_id&0x7)) * 4)) & 0xf;
++ leon3_irqctrl_regs += icsel;
++ }
+
+ /* Mask all IRQs on boot-cpu IRQ controller */
+ LEON3_BYPASS_STORE_PA(&leon3_irqctrl_regs->mask[boot_cpu_id], 0);
+--
+2.34.1
+
--- /dev/null
+From fb4b064cd33812542ac2da66bf9d43de6adbc546 Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Fri, 8 Sep 2017 15:19:56 +0200
+Subject: [PATCH 07/32] sparc32,leon: Flush TLBs of other CPUs using crosscall
+ on SMP
+
+LEON systems do not have hardware broadcast for TLB flushes (unlike Sun4d).
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ arch/sparc/mm/leon_mm.c | 2 +-
+ arch/sparc/mm/srmmu.c | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/sparc/mm/leon_mm.c b/arch/sparc/mm/leon_mm.c
+index 1bb2266d0b80..d894a57a6ab3 100644
+--- a/arch/sparc/mm/leon_mm.c
++++ b/arch/sparc/mm/leon_mm.c
+@@ -271,7 +271,7 @@ int __init leon_flush_needed(void)
+ void leon_switch_mm(void)
+ {
+ srmmu_get_fstatus(); /* errata, must clear FSR.OW to trust next fault */
+- flush_tlb_mm((void *)0);
++ leon_flush_tlb_all();
+ if (leon_flush_during_switch)
+ leon_flush_cache_all();
+ }
+diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
+index 0070f8b9a753..4ab2e43e93a1 100644
+--- a/arch/sparc/mm/srmmu.c
++++ b/arch/sparc/mm/srmmu.c
+@@ -1796,7 +1796,7 @@ void __init load_mmu(void)
+ /* El switcheroo... */
+ local_ops = sparc32_cachetlb_ops;
+
+- if (sparc_cpu_model == sun4d || sparc_cpu_model == sparc_leon) {
++ if (sparc_cpu_model == sun4d) {
+ smp_cachetlb_ops.tlb_all = local_ops->tlb_all;
+ smp_cachetlb_ops.tlb_mm = local_ops->tlb_mm;
+ smp_cachetlb_ops.tlb_range = local_ops->tlb_range;
+--
+2.34.1
+
--- /dev/null
+From 69afe90477a2c09fc6340434249653b6efb91f2e Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Thu, 4 Feb 2021 17:23:41 +0100
+Subject: [PATCH 08/32] sparc32: Preserve clone syscall flags argument for
+ restarts due to signals
+
+This fixes a bug where a clone syscall that is restarted due to a
+pending signal is restarted with garbage in the register %o0 that holds
+the clone flags.
+
+This keep the original %i0 of a syscall (as seen from the trap handler)
+in %l6 rather than %l5. This is done because for clone (and also qfork)
+%l5 is used as a temporary variable in the same register window. Before
+this, that temporary value would be the value that was then incorrectly
+used as the orig_i0 argument to do_notify_resume.
+
+In order to preserve %l6, the temporary usage of %l6 in ret_sys_call is
+changed to use %l5 instead and the setting %l6 to 0 or 1 was removed.
+The use of that 0 or 1 value in %l6 was removed in commit
+28e6103665301ce60634e8a77f0b657c6cc099de.
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ arch/sparc/kernel/entry.S | 8 +++-----
+ arch/sparc/kernel/rtrap_32.S | 2 +-
+ 2 files changed, 4 insertions(+), 6 deletions(-)
+
+diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
+index eaefd7b4961e..b56f489e38bb 100644
+--- a/arch/sparc/kernel/entry.S
++++ b/arch/sparc/kernel/entry.S
+@@ -1001,7 +1001,7 @@ do_syscall:
+ andcc %l5, _TIF_SYSCALL_TRACE, %g0
+ mov %i4, %o4
+ bne linux_syscall_trace
+- mov %i0, %l5
++ mov %i0, %l6
+ 2:
+ call %l7
+ mov %i5, %o5
+@@ -1010,16 +1010,15 @@ do_syscall:
+ st %o0, [%sp + STACKFRAME_SZ + PT_I0]
+
+ ret_sys_call:
+- ld [%curptr + TI_FLAGS], %l6
++ ld [%curptr + TI_FLAGS], %l5
+ cmp %o0, -ERESTART_RESTARTBLOCK
+ ld [%sp + STACKFRAME_SZ + PT_PSR], %g3
+ set PSR_C, %g2
+ bgeu 1f
+- andcc %l6, _TIF_SYSCALL_TRACE, %g0
++ andcc %l5, _TIF_SYSCALL_TRACE, %g0
+
+ /* System call success, clear Carry condition code. */
+ andn %g3, %g2, %g3
+- clr %l6
+ st %g3, [%sp + STACKFRAME_SZ + PT_PSR]
+ bne linux_syscall_trace2
+ ld [%sp + STACKFRAME_SZ + PT_NPC], %l1 /* pc = npc */
+@@ -1034,7 +1033,6 @@ ret_sys_call:
+ sub %g0, %o0, %o0
+ or %g3, %g2, %g3
+ st %o0, [%sp + STACKFRAME_SZ + PT_I0]
+- mov 1, %l6
+ st %g3, [%sp + STACKFRAME_SZ + PT_PSR]
+ bne linux_syscall_trace2
+ ld [%sp + STACKFRAME_SZ + PT_NPC], %l1 /* pc = npc */
+diff --git a/arch/sparc/kernel/rtrap_32.S b/arch/sparc/kernel/rtrap_32.S
+index dca8ed810046..8931fe266346 100644
+--- a/arch/sparc/kernel/rtrap_32.S
++++ b/arch/sparc/kernel/rtrap_32.S
+@@ -75,7 +75,7 @@ signal_p:
+ ld [%sp + STACKFRAME_SZ + PT_PSR], %t_psr
+
+ mov %g2, %o2
+- mov %l5, %o1
++ mov %l6, %o1
+ call do_notify_resume
+ add %sp, STACKFRAME_SZ, %o0 ! pt_regs ptr
+
+--
+2.34.1
+
--- /dev/null
+From d71d7e1d60c841b1d8fc574f5be03af2936ee5ea Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Wed, 9 Jun 2021 14:40:01 +0200
+Subject: [PATCH 09/32] sparc32: Synchronize user stack on fork and clone
+
+Without doing this, an irq 15 IPI during kernel can catch a user space
+framepointer, put a register window in in the thread_info register
+window array leading to the KERNEL_WINDOW_FLUSH is
+sys_clone/sys_fork/sys_vfork not completely flushing all windows to the
+stack, thereby leading to a failed clone/fork/vfork.
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ arch/sparc/kernel/process.c | 40 +++++++++++++++++++++++--------------
+ 1 file changed, 25 insertions(+), 15 deletions(-)
+
+diff --git a/arch/sparc/kernel/process.c b/arch/sparc/kernel/process.c
+index 0442ab00518d..af1d66124485 100644
+--- a/arch/sparc/kernel/process.c
++++ b/arch/sparc/kernel/process.c
+@@ -17,14 +17,18 @@
+
+ asmlinkage long sparc_fork(struct pt_regs *regs)
+ {
+- unsigned long orig_i1 = regs->u_regs[UREG_I1];
++ unsigned long orig_i1;
+ long ret;
+ struct kernel_clone_args args = {
+ .exit_signal = SIGCHLD,
+- /* Reuse the parent's stack for the child. */
+- .stack = regs->u_regs[UREG_FP],
+ };
+
++ synchronize_user_stack();
++
++ orig_i1 = regs->u_regs[UREG_I1];
++ /* Reuse the parent's stack for the child. */
++ args.stack = regs->u_regs[UREG_FP];
++
+ ret = kernel_clone(&args);
+
+ /* If we get an error and potentially restart the system
+@@ -40,16 +44,19 @@ asmlinkage long sparc_fork(struct pt_regs *regs)
+
+ asmlinkage long sparc_vfork(struct pt_regs *regs)
+ {
+- unsigned long orig_i1 = regs->u_regs[UREG_I1];
++ unsigned long orig_i1;
+ long ret;
+-
+- struct kernel_clone_args args = {
++ struct kernel_clone_args args= {
+ .flags = CLONE_VFORK | CLONE_VM,
+ .exit_signal = SIGCHLD,
+- /* Reuse the parent's stack for the child. */
+- .stack = regs->u_regs[UREG_FP],
+ };
+
++ synchronize_user_stack();
++
++ orig_i1 = regs->u_regs[UREG_I1];
++ /* Reuse the parent's stack for the child. */
++ args.stack = regs->u_regs[UREG_FP];
++
+ ret = kernel_clone(&args);
+
+ /* If we get an error and potentially restart the system
+@@ -65,15 +72,18 @@ asmlinkage long sparc_vfork(struct pt_regs *regs)
+
+ asmlinkage long sparc_clone(struct pt_regs *regs)
+ {
+- unsigned long orig_i1 = regs->u_regs[UREG_I1];
+- unsigned int flags = lower_32_bits(regs->u_regs[UREG_I0]);
++ unsigned long orig_i1;
++ unsigned int flags;
+ long ret;
++ struct kernel_clone_args args = {0};
+
+- struct kernel_clone_args args = {
+- .flags = (flags & ~CSIGNAL),
+- .exit_signal = (flags & CSIGNAL),
+- .tls = regs->u_regs[UREG_I3],
+- };
++ synchronize_user_stack();
++
++ orig_i1 = regs->u_regs[UREG_I1];
++ flags = lower_32_bits(regs->u_regs[UREG_I0]);
++ args.flags = (flags & ~CSIGNAL);
++ args.exit_signal = (flags & CSIGNAL);
++ args.tls = regs->u_regs[UREG_I3];
+
+ #ifdef CONFIG_COMPAT
+ if (test_thread_flag(TIF_32BIT)) {
+--
+2.34.1
+
--- /dev/null
+From ce71c62289d17a6e7d809759f6b10297bb066d11 Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Wed, 20 Sep 2017 09:10:46 +0200
+Subject: [PATCH 10/32] sparc32,leon: Handle self clearing FSR.ftt when saving
+ FPU registers
+
+GRFPU self clears the ftt field after a STFSR. Make sure that the ftt
+field is not for cases with multiple STFSRs.
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ arch/sparc/kernel/entry.S | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
+index b56f489e38bb..87c68aeeb794 100644
+--- a/arch/sparc/kernel/entry.S
++++ b/arch/sparc/kernel/entry.S
+@@ -1062,6 +1062,7 @@ fpsave:
+ st %fsr, [%o1] ! this can trap on us if fpu is in bogon state
+ ld [%o1], %g1
+ set 0x2000, %g4
++ set 0x1c000, %g5
+ andcc %g1, %g4, %g0
+ be 2f
+ mov 0, %g2
+@@ -1072,6 +1073,10 @@ fpsave:
+ fpsave_magic:
+ st %fsr, [%o1]
+ ld [%o1], %g3
++ andn %g3, %g5, %g3 /* ftt gets cleared on stfsr for GRFPU... */
++ and %g1, %g5, %g1
++ or %g1, %g3, %g3
++ st %g3, [%o1] /* ... so set ftt field from first fsr read */
+ andcc %g3, %g4, %g0
+ add %g2, 1, %g2
+ bne 1b
+@@ -1108,8 +1113,9 @@ fpsave_catch:
+ st %fsr, [%o1]
+
+ fpsave_catch2:
++ st %fsr, [%o1] /* In this case, this is the first successful fsr read */
+ b fpsave + 4
+- st %fsr, [%o1]
++ ld [%o1], %g1 /* so take care of it for GRFPU ftt field handling */
+
+ /* void fpload(unsigned long *fpregs, unsigned long *fsr); */
+
+--
+2.34.1
+
--- /dev/null
+From 89c7c30cd55c04e32393ca8586e0564096c7a517 Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Thu, 6 Jul 2017 10:30:27 +0200
+Subject: [PATCH 11/32] sparc32,leon: Fix description of SPARC_LEON config
+ option
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ arch/sparc/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
+index 2099e1886450..b7aa802d3372 100644
+--- a/arch/sparc/Kconfig
++++ b/arch/sparc/Kconfig
+@@ -387,7 +387,7 @@ config SPARC_LEON
+ select USB_UHCI_BIG_ENDIAN_MMIO
+ select USB_UHCI_BIG_ENDIAN_DESC
+ help
+- If you say Y here if you are running on a SPARC-LEON processor.
++ Say Y here if you are running on a SPARC-LEON processor.
+ The LEON processor is a synthesizable VHDL model of the
+ SPARC-v8 standard. LEON is part of the GRLIB collection of
+ IP cores that are distributed under GPL. GRLIB can be downloaded
+--
+2.34.1
+
--- /dev/null
+From f9c156b099985740a2c521ff237ef70916edd160 Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Thu, 22 Oct 2020 16:36:05 +0200
+Subject: [PATCH 12/32] sparc32,leon: Add cpuinfo for LEON5 and add fpuinfo for
+ GRFPU5 and NanoFPU
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ arch/sparc/kernel/cpu.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c
+index 79cd6ccfeac0..5eb0b28ae282 100644
+--- a/arch/sparc/kernel/cpu.c
++++ b/arch/sparc/kernel/cpu.c
+@@ -196,11 +196,14 @@ static const struct manufacturer_info __initconst manufacturer_info[] = {
+ PSR_IMPL_LEON, /* Aeroflex Gaisler */
+ .cpu_info = {
+ CPU(3, "LEON"),
++ CPU(5, "LEON5"),
+ CPU(-1, NULL)
+ },
+ .fpu_info = {
+ FPU(2, "GRFPU"),
+ FPU(3, "GRFPU-Lite"),
++ FPU(4, "GRFPU5"),
++ FPU(5, "NanoFPU"),
+ FPU(-1, NULL)
+ }
+ },{
+--
+2.34.1
+
--- /dev/null
+From 4c6fe52c1f496e3e170e745efe179b5bda97ed4b Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Wed, 20 Sep 2017 14:34:30 +0200
+Subject: [PATCH 13/32] greth: Do not reset GRETH when EDCL is active
+
+"Active" is defined as the EDCL available, EDCL not disabled and kernel
+module parameter greth_edcl is not set to 0.
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ drivers/net/ethernet/aeroflex/greth.c | 43 ++++++++++++++++-----------
+ drivers/net/ethernet/aeroflex/greth.h | 4 +++
+ 2 files changed, 30 insertions(+), 17 deletions(-)
+
+diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c
+index 0d56cb4f5dd9..e8dbdba69c9f 100644
+--- a/drivers/net/ethernet/aeroflex/greth.c
++++ b/drivers/net/ethernet/aeroflex/greth.c
+@@ -1318,7 +1318,7 @@ static int greth_mdio_init(struct greth_private *greth)
+ phy_start(ndev->phydev);
+
+ /* If Ethernet debug link is used make autoneg happen right away */
+- if (greth->edcl && greth_edcl == 1) {
++ if (greth->edcl) {
+ phy_start_aneg(ndev->phydev);
+ timeout = jiffies + 6*HZ;
+ while (!phy_aneg_done(ndev->phydev) &&
+@@ -1382,20 +1382,27 @@ static int greth_of_probe(struct platform_device *ofdev)
+ dev_set_drvdata(greth->dev, dev);
+ SET_NETDEV_DEV(dev, greth->dev);
+
+- if (netif_msg_probe(greth))
+- dev_dbg(greth->dev, "resetting controller.\n");
+-
+- /* Reset the controller. */
+- GRETH_REGSAVE(regs->control, GRETH_RESET);
++ /* Check if we have EDCL that is not disabled */
++ tmp = GRETH_REGLOAD(regs->control);
++ greth->have_edcl = !!(tmp & GRETH_CTRL_EA);
++ greth->edcl = greth->have_edcl && !(tmp & GRETH_CTRL_ED) && greth_edcl;
+
+- /* Wait for MAC to reset itself */
+- timeout = jiffies + HZ/100;
+- while (GRETH_REGLOAD(regs->control) & GRETH_RESET) {
+- if (time_after(jiffies, timeout)) {
+- err = -EIO;
+- if (netif_msg_probe(greth))
+- dev_err(greth->dev, "timeout when waiting for reset.\n");
+- goto error2;
++ if (!greth->edcl) {
++ if (netif_msg_probe(greth))
++ dev_dbg(greth->dev, "resetting controller.\n");
++
++ /* Reset the controller. */
++ GRETH_REGSAVE(regs->control, GRETH_RESET);
++
++ /* Wait for MAC to reset itself */
++ timeout = jiffies + HZ/100;
++ while (GRETH_REGLOAD(regs->control) & GRETH_RESET) {
++ if (time_after(jiffies, timeout)) {
++ err = -EIO;
++ if (netif_msg_probe(greth))
++ dev_err(greth->dev, "timeout when waiting for reset.\n");
++ goto error2;
++ }
+ }
+ }
+
+@@ -1409,13 +1416,15 @@ static int greth_of_probe(struct platform_device *ofdev)
+ /* Check for multicast capability */
+ greth->multicast = (tmp >> 25) & 1;
+
+- greth->edcl = (tmp >> 31) & 1;
+-
+ /* If we have EDCL we disable the EDCL speed-duplex FSM so
+ * it doesn't interfere with the software */
+- if (greth->edcl != 0)
++ if (greth->have_edcl)
+ GRETH_REGORIN(regs->control, GRETH_CTRL_DISDUPLEX);
+
++ /* Disable EDCL if it should not be used */
++ if (greth->have_edcl && !greth->edcl)
++ GRETH_REGORIN(regs->control, GRETH_CTRL_ED);
++
+ /* Check if MAC can handle MDIO interrupts */
+ greth->mdio_int_en = (tmp >> 26) & 1;
+
+diff --git a/drivers/net/ethernet/aeroflex/greth.h b/drivers/net/ethernet/aeroflex/greth.h
+index a1e04c9e932e..589dee5647db 100644
+--- a/drivers/net/ethernet/aeroflex/greth.h
++++ b/drivers/net/ethernet/aeroflex/greth.h
+@@ -16,6 +16,9 @@
+ #define GRETH_CTRL_PSTATIEN 0x400
+ #define GRETH_CTRL_MCEN 0x800
+ #define GRETH_CTRL_DISDUPLEX 0x1000
++#define GRETH_CTRL_ED 0x4000
++#define GRETH_CTRL_EA 0x80000000
++
+ #define GRETH_STATUS_PHYSTAT 0x100
+
+ #define GRETH_BD_EN 0x800
+@@ -135,6 +138,7 @@ struct greth_private {
+ u8 multicast;
+ u8 gbit_mac;
+ u8 mdio_int_en;
++ u8 have_edcl;
+ u8 edcl;
+ };
+
+--
+2.34.1
+
--- /dev/null
+From 5d82acbb9a8c98fe7eb86a59f3f1471a713f8c92 Mon Sep 17 00:00:00 2001
+From: Konrad Eisele <konrad@gaisler.com>
+Date: Tue, 10 Apr 2012 15:22:11 +0200
+Subject: [PATCH 14/32] sparc32,leon: prevent long uart fifo discovery loop
+
+On GR712 uart might be disabled. To avoid spinning forever limit loopcount
+
+Signed-off-by: Konrad Eisele <konrad@gaisler.com>
+---
+ drivers/tty/serial/apbuart.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/tty/serial/apbuart.c b/drivers/tty/serial/apbuart.c
+index e8d56e899ec7..a22c48e5d217 100644
+--- a/drivers/tty/serial/apbuart.c
++++ b/drivers/tty/serial/apbuart.c
+@@ -363,7 +363,7 @@ static int apbuart_scan_fifo_size(struct uart_port *port, int portnumber)
+
+ UART_PUT_CTRL(port, ctrl | UART_CTRL_TE);
+
+- while (!UART_TX_READY(UART_GET_STATUS(port)))
++ while ((!UART_TX_READY(UART_GET_STATUS(port))) && loop < 0x7fffff)
+ loop++;
+
+ /*
+--
+2.34.1
+
--- /dev/null
+From 91bdb4d2a59aa7a7706c590ca98eb161c0751ee9 Mon Sep 17 00:00:00 2001
+From: Daniel Cederman <cederman@gaisler.com>
+Date: Wed, 10 Jun 2020 12:26:44 +0200
+Subject: [PATCH 15/32] sparc: Make PAGE_NONE a valid PTE
+
+Otherwise when exiting a program that has set a page to PROT_NONE
+we get error messages about the page having a bad swap file mapping.
+See for example struct-ret-3.c in GCC.
+---
+ arch/sparc/include/asm/pgtsrmmu.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/sparc/include/asm/pgtsrmmu.h b/arch/sparc/include/asm/pgtsrmmu.h
+index 7708d015712b..117009b03cf4 100644
+--- a/arch/sparc/include/asm/pgtsrmmu.h
++++ b/arch/sparc/include/asm/pgtsrmmu.h
+@@ -74,7 +74,7 @@
+ * enforce all the protection levels that vma's can have.
+ * XXX But for now...
+ */
+-#define SRMMU_PAGE_NONE __pgprot(SRMMU_CACHE | \
++#define SRMMU_PAGE_NONE __pgprot(SRMMU_VALID | SRMMU_CACHE | \
+ SRMMU_PRIV | SRMMU_REF)
+ #define SRMMU_PAGE_SHARED __pgprot(SRMMU_VALID | SRMMU_CACHE | \
+ SRMMU_EXEC | SRMMU_WRITE | SRMMU_REF)
+--
+2.34.1
+
--- /dev/null
+From 2a43646d7a4090792f7fe38e093de84e4d0ece23 Mon Sep 17 00:00:00 2001
+From: Jan Andersson <jan@gaisler.com>
+Date: Tue, 19 Apr 2011 11:12:46 +0200
+Subject: [PATCH 16/32] spi: add support for aeroflex gaisler spimctrl
+
+This patch adds support for Aeroflex Gaisler SPI memory controller (SPIMCTRL).
+SPIMCTRL memory maps a SPI flash device into AMBA address space. The core
+also has a register interface where any command can be sent to the device.
+
+The controller is typically found on LEON/GRLIB SoCs.
+
+Tested on GR-LEON4-ITX development board.
+
+Signed-off-by: Jan Andersson <jan@gaisler.com>
+---
+ drivers/spi/Kconfig | 8 +
+ drivers/spi/Makefile | 1 +
+ drivers/spi/spi-spimctrl.c | 357 +++++++++++++++++++++++++++++++++++++
+ 3 files changed, 366 insertions(+)
+ create mode 100644 drivers/spi/spi-spimctrl.c
+
+diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
+index 4d98ce7571df..cdc42f31feef 100644
+--- a/drivers/spi/Kconfig
++++ b/drivers/spi/Kconfig
+@@ -57,6 +57,14 @@ config SPI_MEM
+
+ comment "SPI Master Controller Drivers"
+
++config SPI_SPIMCTRL
++ tristate "Aeroflex Gaisler SPI memory controller"
++ depends on SPARC_LEON
++ select SPI_BITBANG
++ help
++ This is the driver for Aeroflex Gaisler's SPI memory controller
++ (SPIMCTRL) found on some LEON/GRLIB SoCs.
++
+ config SPI_ALTERA
+ tristate "Altera SPI Controller"
+ select REGMAP_MMIO
+diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
+index 6fea5821662e..d7b1c7650e1a 100644
+--- a/drivers/spi/Makefile
++++ b/drivers/spi/Makefile
+@@ -14,6 +14,7 @@ obj-$(CONFIG_SPI_SPIDEV) += spidev.o
+ obj-$(CONFIG_SPI_LOOPBACK_TEST) += spi-loopback-test.o
+
+ # SPI master controller drivers (bus)
++obj-$(CONFIG_SPI_SPIMCTRL) += spi-spimctrl.o
+ obj-$(CONFIG_SPI_ALTERA) += spi-altera.o
+ obj-$(CONFIG_SPI_AR934X) += spi-ar934x.o
+ obj-$(CONFIG_SPI_ARMADA_3700) += spi-armada-3700.o
+diff --git a/drivers/spi/spi-spimctrl.c b/drivers/spi/spi-spimctrl.c
+new file mode 100644
+index 000000000000..4d025e54d9d3
+--- /dev/null
++++ b/drivers/spi/spi-spimctrl.c
+@@ -0,0 +1,357 @@
++/*
++ * Driver for Aeroflex Gaisler SPIMCTRL
++ *
++ * SPIMCTRL maps SPI flash devices in a read-only memory area and also provides
++ * a register interface that allows any SPI command to be sent. This driver only
++ * makes use of the register interface.
++ *
++ * Copyright (c) 2011 Jan Andersson <jan@gaisler.com>
++ *
++ * This driver is based on:
++ *
++ * Altera SPI driver
++ * Copyright (C) 2008 Thomas Chou <thomas@wytron.com.tw>
++ * which in turn was based on spi_s3c24xx.c, which is:
++ * Copyright (c) 2006 Ben Dooks
++ * Copyright (c) 2006 Simtec Electronics
++ * Ben Dooks <ben@simtec.co.uk>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/interrupt.h>
++#include <linux/errno.h>
++#include <linux/platform_device.h>
++#include <linux/spi/spi.h>
++#include <linux/spi/spi_bitbang.h>
++#include <linux/io.h>
++#include <linux/of_irq.h>
++
++#define DRV_NAME "grlib-spimctrl"
++
++/* Core has one chip-select only */
++#define GR_SPIM_NUMCS 1
++
++/* Register offsets */
++#define GR_SPIM_CTRL 0x04
++#define GR_SPIM_STAT 0x08
++#define GR_SPIM_RX 0x0C
++#define GR_SPIM_TX 0x10
++
++/* Register fields */
++#define GR_SPIM_CTRL_CSN (1 << 3)
++#define GR_SPIM_CTRL_IEN (1 << 1)
++#define GR_SPIM_CTRL_USRC (1 << 0)
++
++#define GR_SPIM_STAT_BUSY (1 << 1)
++#define GR_SPIM_STAT_DONE (1 << 0)
++
++
++struct gr_spimctrl {
++ /* bitbang has to be first */
++ struct spi_bitbang bitbang;
++ struct completion done;
++
++ void __iomem *base;
++ int irq;
++ int len;
++ int count;
++ u32 ctrl;
++
++ /* data buffers */
++ const unsigned char *tx;
++ unsigned char *rx;
++};
++
++static inline void gr_spim_write(u32 val, void __iomem *addr)
++{
++ iowrite32be(val, addr);
++}
++
++static inline u32 gr_spim_read(void __iomem *addr)
++{
++ return ioread32be(addr);
++}
++
++
++static inline struct gr_spimctrl *gr_spimctrl_spi_to_hw(struct spi_device *sdev)
++{
++ return spi_master_get_devdata(sdev->master);
++}
++
++static void gr_spimctrl_chipsel(struct spi_device *spi, int value)
++{
++ struct gr_spimctrl *hw = gr_spimctrl_spi_to_hw(spi);
++ u32 ctrl = hw->ctrl;
++
++ if (spi->mode & SPI_CS_HIGH) {
++ switch (value) {
++ case BITBANG_CS_INACTIVE:
++ hw->ctrl &= ~GR_SPIM_CTRL_CSN;
++ break;
++
++ case BITBANG_CS_ACTIVE:
++ hw->ctrl |= GR_SPIM_CTRL_CSN;
++ break;
++ }
++ } else {
++ switch (value) {
++ case BITBANG_CS_INACTIVE:
++ hw->ctrl |= GR_SPIM_CTRL_CSN;
++ break;
++
++ case BITBANG_CS_ACTIVE:
++ hw->ctrl &= ~GR_SPIM_CTRL_CSN;
++ break;
++ }
++ }
++ if (ctrl != hw->ctrl)
++ gr_spim_write(hw->ctrl, hw->base + GR_SPIM_CTRL);
++}
++
++static int gr_spimctrl_setupxfer(struct spi_device *spi, struct spi_transfer *t)
++{
++ /* the controller does not support mode changes so we just ignore them.
++ * we can assume that the controller is attached to a memory device and
++ * that the controller can communicate with this device.
++ */
++
++ if (t && t->bits_per_word % 8)
++ return -EINVAL;
++
++ if (spi->bits_per_word % 8)
++ return -EINVAL;
++
++ if (spi->chip_select > GR_SPIM_NUMCS)
++ return -EINVAL;
++
++ return 0;
++}
++
++static int gr_spimctrl_setup(struct spi_device *spi)
++{
++ return gr_spimctrl_setupxfer(spi, NULL);
++}
++
++static void gr_spimctrl_cleanup(struct spi_device *spi)
++{
++ struct gr_spimctrl *hw = gr_spimctrl_spi_to_hw(spi);
++
++ hw->ctrl &= ~GR_SPIM_CTRL_USRC;
++ gr_spim_write(hw->ctrl, hw->base + GR_SPIM_CTRL);
++}
++
++static int gr_spimctrl_txrx(struct spi_device *spi, struct spi_transfer *t)
++{
++ struct gr_spimctrl *hw = gr_spimctrl_spi_to_hw(spi);
++
++ hw->tx = t->tx_buf;
++ hw->rx = t->rx_buf;
++ hw->count = 0;
++ hw->len = t->len;
++
++ if (hw->irq != NO_IRQ) {
++ /* interrupt driven transfer, send the first byte */
++ gr_spim_write(GR_SPIM_STAT_DONE, hw->base + GR_SPIM_STAT);
++ gr_spim_write(hw->tx ? *hw->tx++ : 0, hw->base + GR_SPIM_TX);
++ wait_for_completion(&hw->done);
++ } else {
++ /* polling */
++ do {
++ /* clear done bit, transmit, wait for receive .. */
++ gr_spim_write(GR_SPIM_STAT_DONE,
++ hw->base + GR_SPIM_STAT);
++
++ gr_spim_write(hw->tx ? *hw->tx++ : 0,
++ hw->base + GR_SPIM_TX);
++
++ while (!(gr_spim_read(hw->base + GR_SPIM_STAT) &
++ GR_SPIM_STAT_DONE))
++ cpu_relax();
++
++ if (hw->rx)
++ hw->rx[hw->count] =
++ gr_spim_read(hw->base + GR_SPIM_RX);
++
++ hw->count++;
++ } while (hw->count < hw->len);
++ }
++
++ return hw->count;
++}
++
++static irqreturn_t gr_spimctrl_irq(int irq, void *dev)
++{
++ struct gr_spimctrl *hw = dev;
++ u32 rxd;
++
++ if (!(gr_spim_read(hw->base + GR_SPIM_STAT) & GR_SPIM_STAT_DONE))
++ return IRQ_NONE;
++
++ if (hw->rx) {
++ rxd = gr_spim_read(hw->base + GR_SPIM_RX);
++ hw->rx[hw->count] = rxd;
++ }
++
++ hw->count++;
++
++ gr_spim_write(GR_SPIM_STAT_DONE, hw->base + GR_SPIM_STAT);
++
++ if (hw->count < hw->len)
++ gr_spim_write(hw->tx ? *hw->tx++ : 0, hw->base + GR_SPIM_TX);
++ else
++ complete(&hw->done);
++
++ return IRQ_HANDLED;
++}
++
++static int gr_spimctrl_probe(struct platform_device *pdev)
++{
++ struct gr_spimctrl *hw;
++ struct spi_master *master;
++ int err = -ENODEV;
++ struct resource *r;
++ u32 status;
++
++ master = spi_alloc_master(&pdev->dev, sizeof(struct gr_spimctrl));
++ if (!master)
++ return err;
++
++ /* setup the master state */
++ master->bus_num = pdev->id;
++ master->num_chipselect = GR_SPIM_NUMCS;
++ master->mode_bits = SPI_CS_HIGH;
++ master->setup = gr_spimctrl_setup;
++ master->cleanup = gr_spimctrl_cleanup;
++
++ hw = spi_master_get_devdata(master);
++ platform_set_drvdata(pdev, hw);
++
++ /* setup the state for the bitbang driver */
++ hw->bitbang.master = spi_master_get(master);
++ if (!hw->bitbang.master)
++ goto exit;
++ hw->bitbang.master->dev.of_node = of_node_get(pdev->dev.of_node);
++
++ hw->bitbang.setup_transfer = gr_spimctrl_setupxfer;
++ hw->bitbang.chipselect = gr_spimctrl_chipsel;
++ hw->bitbang.txrx_bufs = gr_spimctrl_txrx;
++
++ /* find and map our resources */
++ r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++ hw->base = of_ioremap(r, 0, resource_size(r), "ag-spimctrl regs");
++ if (!hw->base) {
++ err = -EBUSY;
++ goto exit;
++ }
++
++ /* check current hw state. if controller is busy, leave it alone */
++ status = gr_spim_read(hw->base + GR_SPIM_STAT);
++ if (status & GR_SPIM_STAT_BUSY) {
++ err = -EBUSY;
++ goto exit_iounmap;
++ }
++
++ /* save control register value to keep settings */
++ hw->ctrl = gr_spim_read(hw->base + GR_SPIM_CTRL);
++
++ /* irq is optional */
++ hw->irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
++ if (hw->irq != NO_IRQ) {
++ init_completion(&hw->done);
++ err = request_irq(hw->irq, gr_spimctrl_irq, IRQF_SHARED,
++ pdev->name, hw);
++ if (err)
++ goto exit_iounmap;
++ /* enable interrupt, written to hw below */
++ hw->ctrl |= GR_SPIM_CTRL_IEN;
++ }
++
++ /* enter user mode so SPI comm. can be done via reg. interface */
++ if (!(hw->ctrl & GR_SPIM_CTRL_USRC)) {
++ hw->ctrl |= GR_SPIM_CTRL_USRC;
++ gr_spim_write(hw->ctrl, hw->base + GR_SPIM_CTRL);
++ }
++
++ /* register our spi controller */
++ err = spi_bitbang_start(&hw->bitbang);
++ if (err)
++ goto exit_iounmap;
++
++ dev_info(&pdev->dev, "base at 0x%p, irq %d, bus %d\n",
++ hw->base, hw->irq, master->bus_num);
++
++ return 0;
++
++exit_iounmap:
++ of_iounmap(r, hw->base, resource_size(r));
++exit:
++ spi_master_put(master);
++ platform_set_drvdata(pdev, NULL);
++ return err;
++}
++
++static int gr_spimctrl_remove(struct platform_device *pdev)
++{
++ struct gr_spimctrl *hw = platform_get_drvdata(pdev);
++ struct spi_master *master = hw->bitbang.master;
++ struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++
++ spi_bitbang_stop(&hw->bitbang);
++
++ /* bring hw out of user mode */
++ hw->ctrl &= ~GR_SPIM_CTRL_USRC;
++ gr_spim_write(hw->ctrl, hw->base + GR_SPIM_CTRL);
++
++ spi_master_put(master);
++
++ if (hw->irq != NO_IRQ)
++ free_irq(hw->irq, hw);
++ of_iounmap(r, hw->base, resource_size(r));
++
++ platform_set_drvdata(pdev, NULL);
++
++ return 0;
++}
++
++#ifdef CONFIG_OF
++static const struct of_device_id gr_spimctrl_of_match[] = {
++ { .name = "GAISLER_SPIMCTRL",},
++ { .name = "01_045",},
++ {},
++};
++MODULE_DEVICE_TABLE(of, gr_spimctrl_of_match);
++#else /* CONFIG_OF */
++#define gr_spimctrl_of_match NULL
++#endif /* CONFIG_OF */
++
++static struct platform_driver gr_spimctrl_driver = {
++ .probe = gr_spimctrl_probe,
++ .remove = gr_spimctrl_remove,
++ .driver = {
++ .name = DRV_NAME,
++ .owner = THIS_MODULE,
++ .pm = NULL,
++ .of_match_table = gr_spimctrl_of_match,
++ },
++};
++
++static int __init gr_spimctrl_init(void)
++{
++ return platform_driver_register(&gr_spimctrl_driver);
++}
++module_init(gr_spimctrl_init);
++
++static void __exit gr_spimctrl_exit(void)
++{
++ platform_driver_unregister(&gr_spimctrl_driver);
++}
++module_exit(gr_spimctrl_exit);
++
++MODULE_DESCRIPTION("Aeroflex Gaisler GRLIB SPIMCTRL driver");
++MODULE_AUTHOR("Jan Andersson <jan@gaisler.com>");
++MODULE_LICENSE("GPL");
+--
+2.34.1
+
--- /dev/null
+From 72f84f6598168f11c9c5d17ccbd9a5ce66868cbb Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Tue, 22 Jun 2021 18:25:12 +0200
+Subject: [PATCH 17/32] tty/serial/apbuart: Clear error and break bits at probe
+ time
+
+Otherwise old bits laying around can trigger things like an old break
+bit triggering sysrq at first intput even though the break condition was
+ancient.
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ drivers/tty/serial/apbuart.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/tty/serial/apbuart.c b/drivers/tty/serial/apbuart.c
+index a22c48e5d217..385d58c2cf04 100644
+--- a/drivers/tty/serial/apbuart.c
++++ b/drivers/tty/serial/apbuart.c
+@@ -564,6 +564,9 @@ static int apbuart_probe(struct platform_device *op)
+
+ apbuart_flush_fifo((struct uart_port *) port);
+
++ /* Clear any errors or breaks */
++ UART_PUT_STATUS(port, 0);
++
+ printk(KERN_INFO "grlib-apbuart at 0x%llx, irq %d\n",
+ (unsigned long long) port->mapbase, port->irq);
+ return 0;
+--
+2.34.1
+
--- /dev/null
+From 09d229f9175b4b204aad3abda807034a87ff1e59 Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Wed, 23 Jun 2021 11:56:56 +0200
+Subject: [PATCH 18/32] spi: spi-fsl-spi: Make slvsel register chipselects
+ adhere to SPI_CS_HIGH again
+
+In the progress of letting gpiolib hande GPIO chipselects, this support
+was lost.
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ drivers/spi/spi-fsl-spi.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c
+index bdf94cc7be1a..8e78a5cdbb8d 100644
+--- a/drivers/spi/spi-fsl-spi.c
++++ b/drivers/spi/spi-fsl-spi.c
+@@ -560,8 +560,11 @@ static void fsl_spi_grlib_cs_control(struct spi_device *spi, bool on)
+ if (spi->cs_gpiod) {
+ gpiod_set_value(spi->cs_gpiod, on);
+ } else if (cs < mpc8xxx_spi->native_chipselects) {
++ bool ahigh = spi->mode & SPI_CS_HIGH;
++ bool setbit = on ? ahigh : !ahigh;
++
+ slvsel = mpc8xxx_spi_read_reg(®_base->slvsel);
+- slvsel = on ? (slvsel | (1 << cs)) : (slvsel & ~(1 << cs));
++ slvsel = setbit ? (slvsel | (1 << cs)) : (slvsel & ~(1 << cs));
+ mpc8xxx_spi_write_reg(®_base->slvsel, slvsel);
+ }
+ }
+--
+2.34.1
+
--- /dev/null
+From df597fceec1f0443b134c87b0b4bdd4843d66fb3 Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Wed, 5 Apr 2017 15:42:18 +0200
+Subject: [PATCH 19/32] sparc32: leon: Align flush instructions for N2X
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ arch/sparc/include/asm/cacheflush.h | 2 +-
+ arch/sparc/mm/leon_mm.c | 4 ++--
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/arch/sparc/include/asm/cacheflush.h b/arch/sparc/include/asm/cacheflush.h
+index 881ac76eab93..a887f90c0006 100644
+--- a/arch/sparc/include/asm/cacheflush.h
++++ b/arch/sparc/include/asm/cacheflush.h
+@@ -3,7 +3,7 @@
+ #define ___ASM_SPARC_CACHEFLUSH_H
+
+ /* flush addr - to allow use of self-modifying code */
+-#define flushi(addr) __asm__ __volatile__ ("flush %0" : : "r" (addr) : "memory")
++#define flushi(addr) __asm__ __volatile__ (".align 32\nflush %0\n.align 32\n" : : "r" (addr) : "memory")
+
+ #if defined(__sparc__) && defined(__arch64__)
+ #include <asm/cacheflush_64.h>
+diff --git a/arch/sparc/mm/leon_mm.c b/arch/sparc/mm/leon_mm.c
+index d894a57a6ab3..f8ac99759ed3 100644
+--- a/arch/sparc/mm/leon_mm.c
++++ b/arch/sparc/mm/leon_mm.c
+@@ -183,7 +183,7 @@ unsigned long leon_swprobe(unsigned long vaddr, unsigned long *paddr)
+
+ void leon_flush_icache_all(void)
+ {
+- __asm__ __volatile__(" flush "); /*iflush*/
++ __asm__ __volatile__(".align 32\nflush\n.align 32\n"); /*iflush*/
+ }
+
+ void leon_flush_dcache_all(void)
+@@ -201,7 +201,7 @@ void leon_flush_pcache_all(struct vm_area_struct *vma, unsigned long page)
+
+ void leon_flush_cache_all(void)
+ {
+- __asm__ __volatile__(" flush "); /*iflush*/
++ __asm__ __volatile__(".align 32\nflush\n.align 32\n"); /*iflush*/
+ __asm__ __volatile__("sta %%g0, [%%g0] %0\n\t" : :
+ "i"(ASI_LEON_DFLUSH) : "memory");
+ }
+--
+2.34.1
+
--- /dev/null
+From 5394f961bbab7fe4b7576739eb59d1d5f417e669 Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Mon, 21 Sep 2020 11:07:10 +0200
+Subject: [PATCH 20/32] sparc32,leon: Add configuration option for enabling
+ -mfix-ut700
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ arch/sparc/Kconfig | 7 +++++++
+ arch/sparc/Makefile | 5 +++++
+ 2 files changed, 12 insertions(+)
+
+diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
+index b7aa802d3372..73de59d47d47 100644
+--- a/arch/sparc/Kconfig
++++ b/arch/sparc/Kconfig
+@@ -395,6 +395,13 @@ config SPARC_LEON
+ toolchain at www.gaisler.com.
+
+ if SPARC_LEON
++config SPARC_LEON_FIX_UT700
++ bool "UT700 errata fixes"
++ default n
++ help
++ If you say Y here, errata fixes will be included for UT700 errata.
++ If unsure, say N.
++
+ config SPARC_LEON_CAS
+ bool "Use Compare and Swap"
+ default y
+diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
+index 65cd5c7dc160..b63401ecbfbf 100644
+--- a/arch/sparc/Makefile
++++ b/arch/sparc/Makefile
+@@ -40,6 +40,11 @@ KBUILD_CFLAGS += -Wa,-Av8
+
+ KBUILD_AFLAGS += -m32 -mcpu=$(SPARC_MCPU) -Wa,-Av8
+
++ifeq ($(CONFIG_SPARC_LEON_FIX_UT700),y)
++KBUILD_CFLAGS += -mfix-ut700
++KBUILD_AFLAGS += -mfix-ut700
++endif
++
+ else
+ #####
+ # sparc64
+--
+2.34.1
+
--- /dev/null
+From 42a65418382690b7199ab23fbe72071da3f6a12d Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Thu, 22 Sep 2016 15:52:07 +0200
+Subject: [PATCH 21/32] sparc32: leon: Add fixes for leon3ft b2b store errata
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ arch/sparc/include/asm/asmmacro.h | 12 ++++
+ arch/sparc/include/asm/checksum_32.h | 1 +
+ arch/sparc/include/asm/leon.h | 15 ++++-
+ arch/sparc/include/asm/obio.h | 6 +-
+ arch/sparc/include/asm/pgtsrmmu.h | 7 ++-
+ arch/sparc/include/asm/processor_32.h | 14 ++++-
+ arch/sparc/include/asm/psr.h | 3 +
+ arch/sparc/include/asm/sbi.h | 17 +++++-
+ arch/sparc/include/asm/spinlock_32.h | 10 +++-
+ arch/sparc/include/asm/uaccess_32.h | 5 +-
+ arch/sparc/include/asm/winmacro.h | 23 ++++++-
+ arch/sparc/include/asm/xor_32.h | 18 ++++++
+ arch/sparc/kernel/entry.S | 46 +++++++++++---
+ arch/sparc/kernel/etrap_32.S | 1 +
+ arch/sparc/kernel/head_32.S | 19 +++++-
+ arch/sparc/kernel/leon_smp.c | 10 +++-
+ arch/sparc/kernel/sun4d_smp.c | 8 ++-
+ arch/sparc/kernel/una_asm_32.S | 23 ++++---
+ arch/sparc/kernel/wof.S | 4 +-
+ arch/sparc/lib/blockops.S | 17 +++++-
+ arch/sparc/lib/checksum_32.S | 28 +++++----
+ arch/sparc/lib/copy_user.S | 7 ++-
+ arch/sparc/lib/locks.S | 9 +++
+ arch/sparc/lib/memcpy.S | 10 ++--
+ arch/sparc/lib/memset.S | 86 ++++++++++++++++++++++-----
+ arch/sparc/mm/hypersparc.S | 54 +++++++++++++----
+ arch/sparc/mm/leon_mm.c | 14 ++++-
+ arch/sparc/mm/srmmu.c | 5 +-
+ arch/sparc/mm/swift.S | 7 ++-
+ arch/sparc/mm/tsunami.S | 4 +-
+ arch/sparc/mm/viking.S | 43 ++++++++------
+ 31 files changed, 417 insertions(+), 109 deletions(-)
+
+diff --git a/arch/sparc/include/asm/asmmacro.h b/arch/sparc/include/asm/asmmacro.h
+index 49aaf6f3bc55..687269d581d1 100644
+--- a/arch/sparc/include/asm/asmmacro.h
++++ b/arch/sparc/include/asm/asmmacro.h
+@@ -43,4 +43,16 @@
+ __VA_ARGS__; \
+ .previous
+
++#ifdef __FIX_LEON3FT_B2BST
++#define B2B_SINGLE_NOP nop;
++#define B2B_DOUBLE_NOP nop; nop;
++#define B2B_INLINE_SINGLE_NOP "nop\n\t"
++#define B2B_INLINE_DOUBLE_NOP "nop\n\tnop\n\t"
++#else
++#define B2B_SINGLE_NOP
++#define B2B_DOUBLE_NOP
++#define B2B_INLINE_SINGLE_NOP ""
++#define B2B_INLINE_DOUBLE_NOP ""
++#endif
++
+ #endif /* !(_SPARC_ASMMACRO_H) */
+diff --git a/arch/sparc/include/asm/checksum_32.h b/arch/sparc/include/asm/checksum_32.h
+index ce11e0ad80c7..981a36b40754 100644
+--- a/arch/sparc/include/asm/checksum_32.h
++++ b/arch/sparc/include/asm/checksum_32.h
+@@ -18,6 +18,7 @@
+
+ #include <linux/in6.h>
+ #include <linux/uaccess.h>
++#include <asm/asmmacro.h>
+
+ /* computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+diff --git a/arch/sparc/include/asm/leon.h b/arch/sparc/include/asm/leon.h
+index c1e05e4ab9e3..143c06f8c7bc 100644
+--- a/arch/sparc/include/asm/leon.h
++++ b/arch/sparc/include/asm/leon.h
+@@ -61,10 +61,15 @@
+
+ #ifndef __ASSEMBLY__
+
++#include <asm/asmmacro.h>
++
+ /* do a physical address bypass write, i.e. for 0x80000000 */
+ static inline void leon_store_reg(unsigned long paddr, unsigned long value)
+ {
+- __asm__ __volatile__("sta %0, [%1] %2\n\t" : : "r"(value), "r"(paddr),
++ __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP
++ "sta %0, [%1] %2\n\t"
++ B2B_INLINE_DOUBLE_NOP
++ : : "r"(value), "r"(paddr),
+ "i"(ASI_LEON_BYPASS) : "memory");
+ }
+
+@@ -102,7 +107,9 @@ static inline void sparc_leon3_enable_snooping(void)
+ __asm__ __volatile__ ("lda [%%g0] 2, %%l1\n\t"
+ "set 0x800000, %%l2\n\t"
+ "or %%l2, %%l1, %%l2\n\t"
+- "sta %%l2, [%%g0] 2\n\t" : : : "l1", "l2");
++ "sta %%l2, [%%g0] 2\n\t"
++ B2B_INLINE_DOUBLE_NOP
++ : : : "l1", "l2");
+ };
+
+ static inline int sparc_leon3_snooping_enabled(void)
+@@ -117,7 +124,9 @@ static inline void sparc_leon3_disable_cache(void)
+ __asm__ __volatile__ ("lda [%%g0] 2, %%l1\n\t"
+ "set 0x00000f, %%l2\n\t"
+ "andn %%l2, %%l1, %%l2\n\t"
+- "sta %%l2, [%%g0] 2\n\t" : : : "l1", "l2");
++ "sta %%l2, [%%g0] 2\n\t"
++ B2B_INLINE_DOUBLE_NOP
++ : : : "l1", "l2");
+ };
+
+ static inline unsigned long sparc_leon3_asr17(void)
+diff --git a/arch/sparc/include/asm/obio.h b/arch/sparc/include/asm/obio.h
+index 1b151f738b00..122a49968a04 100644
+--- a/arch/sparc/include/asm/obio.h
++++ b/arch/sparc/include/asm/obio.h
+@@ -112,7 +112,11 @@ static inline int bw_get_intr_mask(int sbus_level)
+
+ static inline void bw_clear_intr_mask(int sbus_level, int mask)
+ {
+- __asm__ __volatile__ ("stha %0, [%1] %2" : :
++ /* Not used for LEON. B2B-nops just to make scan script happy. */
++ __asm__ __volatile__ (B2B_INLINE_DOUBLE_NOP
++ "stha %0, [%1] %2\n\t"
++ B2B_INLINE_DOUBLE_NOP
++ : :
+ "r" (mask),
+ "r" (BW_LOCAL_BASE + BW_INTR_TABLE_CLEAR + (sbus_level << 3)),
+ "i" (ASI_M_CTL));
+diff --git a/arch/sparc/include/asm/pgtsrmmu.h b/arch/sparc/include/asm/pgtsrmmu.h
+index 117009b03cf4..5c16cc8f0a38 100644
+--- a/arch/sparc/include/asm/pgtsrmmu.h
++++ b/arch/sparc/include/asm/pgtsrmmu.h
+@@ -106,6 +106,8 @@
+ restore %g0, %g0, %g0;
+
+ #ifndef __ASSEMBLY__
++#include <asm/asmmacro.h>
++
+ extern unsigned long last_valid_pfn;
+
+ /* This makes sense. Honest it does - Anton */
+@@ -127,7 +129,10 @@ unsigned int srmmu_get_faddr(void);
+ /* This is guaranteed on all SRMMU's. */
+ static inline void srmmu_flush_whole_tlb(void)
+ {
+- __asm__ __volatile__("sta %%g0, [%0] %1\n\t": :
++ __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP
++ "sta %%g0, [%0] %1\n\t"
++ B2B_INLINE_DOUBLE_NOP
++ : :
+ "r" (0x400), /* Flush entire TLB!! */
+ "i" (ASI_M_FLUSH_PROBE) : "memory");
+
+diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
+index 3c4bc2189092..bf7c364caa29 100644
+--- a/arch/sparc/include/asm/processor_32.h
++++ b/arch/sparc/include/asm/processor_32.h
+@@ -12,6 +12,7 @@
+ #include <asm/head.h>
+ #include <asm/signal.h>
+ #include <asm/page.h>
++#include <asm/asmmacro.h>
+
+ /* Whee, this is STACK_TOP + PAGE_SIZE and the lowest kernel address too...
+ * That one page is used to protect kernel from intruders, so that
+@@ -73,15 +74,24 @@ static inline void start_thread(struct pt_regs * regs, unsigned long pc,
+ regs->npc = regs->pc + 4;
+ regs->y = 0;
+ zero = 0;
+- __asm__ __volatile__("std\t%%g0, [%0 + %3 + 0x00]\n\t"
++ __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP
++ "std\t%%g0, [%0 + %3 + 0x00]\n\t"
++ B2B_INLINE_SINGLE_NOP
+ "std\t%%g0, [%0 + %3 + 0x08]\n\t"
++ B2B_INLINE_SINGLE_NOP
+ "std\t%%g0, [%0 + %3 + 0x10]\n\t"
++ B2B_INLINE_SINGLE_NOP
+ "std\t%%g0, [%0 + %3 + 0x18]\n\t"
++ B2B_INLINE_SINGLE_NOP
+ "std\t%%g0, [%0 + %3 + 0x20]\n\t"
++ B2B_INLINE_SINGLE_NOP
+ "std\t%%g0, [%0 + %3 + 0x28]\n\t"
++ B2B_INLINE_SINGLE_NOP
+ "std\t%%g0, [%0 + %3 + 0x30]\n\t"
++ B2B_INLINE_SINGLE_NOP
+ "st\t%1, [%0 + %3 + 0x38]\n\t"
+- "st\t%%g0, [%0 + %3 + 0x3c]"
++ "st\t%%g0, [%0 + %3 + 0x3c]\n\t"
++ B2B_INLINE_DOUBLE_NOP
+ : /* no outputs */
+ : "r" (regs),
+ "r" (sp - sizeof(struct reg_window32)),
+diff --git a/arch/sparc/include/asm/psr.h b/arch/sparc/include/asm/psr.h
+index 65127ce565ab..4ad45ccfe8d8 100644
+--- a/arch/sparc/include/asm/psr.h
++++ b/arch/sparc/include/asm/psr.h
+@@ -15,6 +15,8 @@
+
+
+ #ifndef __ASSEMBLY__
++#include <asm/asmmacro.h>
++
+ /* Get the %psr register. */
+ static inline unsigned int get_psr(void)
+ {
+@@ -55,6 +57,7 @@ static inline unsigned int get_fsr(void)
+ unsigned int fsr = 0;
+
+ __asm__ __volatile__(
++ B2B_INLINE_DOUBLE_NOP
+ "st %%fsr, %1\n\t"
+ "ld %1, %0\n\t"
+ : "=r" (fsr)
+diff --git a/arch/sparc/include/asm/sbi.h b/arch/sparc/include/asm/sbi.h
+index 4d6026c1e446..49b4e0aa4689 100644
+--- a/arch/sparc/include/asm/sbi.h
++++ b/arch/sparc/include/asm/sbi.h
+@@ -66,6 +66,8 @@ struct sbi_regs {
+
+ #ifndef __ASSEMBLY__
+
++#include <asm/asmmacro.h>
++
+ static inline int acquire_sbi(int devid, int mask)
+ {
+ __asm__ __volatile__ ("swapa [%2] %3, %0" :
+@@ -78,7 +80,10 @@ static inline int acquire_sbi(int devid, int mask)
+
+ static inline void release_sbi(int devid, int mask)
+ {
+- __asm__ __volatile__ ("sta %0, [%1] %2" : :
++ __asm__ __volatile__ (B2B_INLINE_DOUBLE_NOP
++ "sta %0, [%1] %2\n\t"
++ B2B_INLINE_DOUBLE_NOP
++ : :
+ "r" (mask),
+ "r" (ECSR_DEV_BASE(devid) | SBI_INTR_STATE),
+ "i" (ASI_M_CTL));
+@@ -86,7 +91,10 @@ static inline void release_sbi(int devid, int mask)
+
+ static inline void set_sbi_tid(int devid, int targetid)
+ {
+- __asm__ __volatile__ ("sta %0, [%1] %2" : :
++ __asm__ __volatile__ (B2B_INLINE_DOUBLE_NOP
++ "sta %0, [%1] %2\n\t"
++ B2B_INLINE_DOUBLE_NOP
++ : :
+ "r" (targetid),
+ "r" (ECSR_DEV_BASE(devid) | SBI_INTR_TID),
+ "i" (ASI_M_CTL));
+@@ -105,7 +113,10 @@ static inline int get_sbi_ctl(int devid, int cfgno)
+
+ static inline void set_sbi_ctl(int devid, int cfgno, int cfg)
+ {
+- __asm__ __volatile__ ("sta %0, [%1] %2" : :
++ __asm__ __volatile__ (B2B_INLINE_DOUBLE_NOP
++ "sta %0, [%1] %2\n\t"
++ B2B_INLINE_DOUBLE_NOP
++ : :
+ "r" (cfg),
+ "r" ((ECSR_DEV_BASE(devid) | SBI_CFG0) + (cfgno<<2)),
+ "i" (ASI_M_CTL));
+diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
+index bc5aa6f61676..adade4095cf2 100644
+--- a/arch/sparc/include/asm/spinlock_32.h
++++ b/arch/sparc/include/asm/spinlock_32.h
+@@ -12,6 +12,7 @@
+ #include <asm/psr.h>
+ #include <asm/barrier.h>
+ #include <asm/processor.h> /* for cpu_relax */
++#include <asm/asmmacro.h>
+
+ #define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
+
+@@ -47,7 +48,10 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
+
+ static inline void arch_spin_unlock(arch_spinlock_t *lock)
+ {
+- __asm__ __volatile__("stb %%g0, [%0]" : : "r" (lock) : "memory");
++ __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP
++ "stb %%g0, [%0]\n\t"
++ B2B_INLINE_DOUBLE_NOP
++ : : "r" (lock) : "memory");
+ }
+
+ /* Read-write spinlocks, allowing multiple readers
+@@ -133,7 +137,9 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
+ static inline void arch_write_unlock(arch_rwlock_t *lock)
+ {
+ __asm__ __volatile__(
+-" st %%g0, [%0]"
++" " B2B_INLINE_DOUBLE_NOP
++" st %%g0, [%0]\n"
++" " B2B_INLINE_DOUBLE_NOP
+ : /* no outputs */
+ : "r" (lock)
+ : "memory");
+diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h
+index 0a2d3ebc4bb8..98432ac982ab 100644
+--- a/arch/sparc/include/asm/uaccess_32.h
++++ b/arch/sparc/include/asm/uaccess_32.h
+@@ -12,6 +12,7 @@
+ #include <linux/string.h>
+
+ #include <asm/processor.h>
++#include <asm/asmmacro.h>
+
+ #define ARCH_HAS_SORT_EXTABLE
+ #define ARCH_HAS_SEARCH_EXTABLE
+@@ -145,8 +146,10 @@ struct __large_struct { unsigned long buf[100]; };
+ #define __put_user_asm(x, size, addr, ret) \
+ __asm__ __volatile__( \
+ "/* Put user asm, inline. */\n" \
++ B2B_INLINE_DOUBLE_NOP \
+ "1:\t" "st"#size " %1, %2\n\t" \
+- "clr %0\n" \
++ "clr %0\n\t" \
++ B2B_INLINE_SINGLE_NOP \
+ "2:\n\n\t" \
+ ".section .fixup,#alloc,#execinstr\n\t" \
+ ".align 4\n" \
+diff --git a/arch/sparc/include/asm/winmacro.h b/arch/sparc/include/asm/winmacro.h
+index b6e911f5d93c..9c6208460a46 100644
+--- a/arch/sparc/include/asm/winmacro.h
++++ b/arch/sparc/include/asm/winmacro.h
+@@ -9,19 +9,28 @@
+ #define _SPARC_WINMACRO_H
+
+ #include <asm/ptrace.h>
++#include <asm/asmmacro.h>
+
+ /* Store the register window onto the 8-byte aligned area starting
+ * at %reg. It might be %sp, it might not, we don't care.
+ */
+ #define STORE_WINDOW(reg) \
+ std %l0, [%reg + RW_L0]; \
++ B2B_SINGLE_NOP \
+ std %l2, [%reg + RW_L2]; \
++ B2B_SINGLE_NOP \
+ std %l4, [%reg + RW_L4]; \
++ B2B_SINGLE_NOP \
+ std %l6, [%reg + RW_L6]; \
++ B2B_SINGLE_NOP \
+ std %i0, [%reg + RW_I0]; \
++ B2B_SINGLE_NOP \
+ std %i2, [%reg + RW_I2]; \
++ B2B_SINGLE_NOP \
+ std %i4, [%reg + RW_I4]; \
+- std %i6, [%reg + RW_I6];
++ B2B_SINGLE_NOP \
++ std %i6, [%reg + RW_I6]; \
++ B2B_SINGLE_NOP
+
+ /* Load a register window from the area beginning at %reg. */
+ #define LOAD_WINDOW(reg) \
+@@ -64,17 +73,25 @@
+
+ #define STORE_PT_INS(base_reg) \
+ std %i0, [%base_reg + STACKFRAME_SZ + PT_I0]; \
++ B2B_SINGLE_NOP \
+ std %i2, [%base_reg + STACKFRAME_SZ + PT_I2]; \
++ B2B_SINGLE_NOP \
+ std %i4, [%base_reg + STACKFRAME_SZ + PT_I4]; \
+- std %i6, [%base_reg + STACKFRAME_SZ + PT_I6];
++ B2B_SINGLE_NOP \
++ std %i6, [%base_reg + STACKFRAME_SZ + PT_I6]; \
++ B2B_SINGLE_NOP
+
+ #define STORE_PT_GLOBALS(base_reg) \
+ st %g1, [%base_reg + STACKFRAME_SZ + PT_G1]; \
+ std %g2, [%base_reg + STACKFRAME_SZ + PT_G2]; \
++ B2B_SINGLE_NOP \
+ std %g4, [%base_reg + STACKFRAME_SZ + PT_G4]; \
+- std %g6, [%base_reg + STACKFRAME_SZ + PT_G6];
++ B2B_SINGLE_NOP \
++ std %g6, [%base_reg + STACKFRAME_SZ + PT_G6]; \
++ B2B_SINGLE_NOP
+
+ #define STORE_PT_YREG(base_reg, scratch) \
++ B2B_SINGLE_NOP \
+ rd %y, %scratch; \
+ st %scratch, [%base_reg + STACKFRAME_SZ + PT_Y];
+
+diff --git a/arch/sparc/include/asm/xor_32.h b/arch/sparc/include/asm/xor_32.h
+index 3e5af37e4b9c..3c72d9644785 100644
+--- a/arch/sparc/include/asm/xor_32.h
++++ b/arch/sparc/include/asm/xor_32.h
+@@ -12,6 +12,8 @@
+ * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ */
+
++#include <asm/asmmacro.h>
++
+ static void
+ sparc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+ {
+@@ -36,9 +38,13 @@ sparc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+ "xor %%o2, %%l4, %%o2\n\t"
+ "xor %%o3, %%l5, %%o3\n\t"
+ "std %%g2, [%0 + 0x00]\n\t"
++ B2B_INLINE_SINGLE_NOP
+ "std %%g4, [%0 + 0x08]\n\t"
++ B2B_INLINE_SINGLE_NOP
+ "std %%o0, [%0 + 0x10]\n\t"
++ B2B_INLINE_SINGLE_NOP
+ "std %%o2, [%0 + 0x18]\n"
++ B2B_INLINE_SINGLE_NOP
+ :
+ : "r" (p1), "r" (p2)
+ : "g2", "g3", "g4", "g5",
+@@ -86,9 +92,13 @@ sparc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ "xor %%o2, %%l4, %%o2\n\t"
+ "xor %%o3, %%l5, %%o3\n\t"
+ "std %%g2, [%0 + 0x00]\n\t"
++ B2B_INLINE_SINGLE_NOP
+ "std %%g4, [%0 + 0x08]\n\t"
++ B2B_INLINE_SINGLE_NOP
+ "std %%o0, [%0 + 0x10]\n\t"
++ B2B_INLINE_SINGLE_NOP
+ "std %%o2, [%0 + 0x18]\n"
++ B2B_INLINE_SINGLE_NOP
+ :
+ : "r" (p1), "r" (p2), "r" (p3)
+ : "g2", "g3", "g4", "g5",
+@@ -149,9 +159,13 @@ sparc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ "xor %%o2, %%l4, %%o2\n\t"
+ "xor %%o3, %%l5, %%o3\n\t"
+ "std %%g2, [%0 + 0x00]\n\t"
++ B2B_INLINE_SINGLE_NOP
+ "std %%g4, [%0 + 0x08]\n\t"
++ B2B_INLINE_SINGLE_NOP
+ "std %%o0, [%0 + 0x10]\n\t"
++ B2B_INLINE_SINGLE_NOP
+ "std %%o2, [%0 + 0x18]\n"
++ B2B_INLINE_SINGLE_NOP
+ :
+ : "r" (p1), "r" (p2), "r" (p3), "r" (p4)
+ : "g2", "g3", "g4", "g5",
+@@ -225,9 +239,13 @@ sparc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ "xor %%o2, %%l4, %%o2\n\t"
+ "xor %%o3, %%l5, %%o3\n\t"
+ "std %%g2, [%0 + 0x00]\n\t"
++ B2B_INLINE_SINGLE_NOP
+ "std %%g4, [%0 + 0x08]\n\t"
++ B2B_INLINE_SINGLE_NOP
+ "std %%o0, [%0 + 0x10]\n\t"
++ B2B_INLINE_SINGLE_NOP
+ "std %%o2, [%0 + 0x18]\n"
++ B2B_INLINE_SINGLE_NOP
+ :
+ : "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5)
+ : "g2", "g3", "g4", "g5",
+diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
+index 87c68aeeb794..c0fdf1de10f2 100644
+--- a/arch/sparc/kernel/entry.S
++++ b/arch/sparc/kernel/entry.S
+@@ -121,6 +121,7 @@ floppy_tdone:
+ sethi %hi(pdma_vaddr), %l5
+ st %l4, [%l5 + %lo(pdma_vaddr)]
+ sethi %hi(pdma_size), %l5
++ B2B_SINGLE_NOP
+ st %l6, [%l5 + %lo(pdma_size)]
+ /* Flip terminal count pin */
+ set auxio_register, %l7
+@@ -138,11 +139,13 @@ floppy_tdone:
+ WRITE_PAUSE
+
+ stb %l5, [%l7]
++ B2B_SINGLE_NOP
+
+ /* Prevent recursion */
+ sethi %hi(doing_pdma), %l7
++ st %g0, [%l7 + %lo(doing_pdma)]
+ b floppy_dosoftint
+- st %g0, [%l7 + %lo(doing_pdma)]
++ nop
+
+ /* We emptied the FIFO, but we haven't read everything
+ * as of yet. Store the current transfer address and
+@@ -153,6 +156,7 @@ floppy_fifo_emptied:
+ sethi %hi(pdma_vaddr), %l5
+ st %l4, [%l5 + %lo(pdma_vaddr)]
+ sethi %hi(pdma_size), %l7
++ B2B_SINGLE_NOP
+ st %l6, [%l7 + %lo(pdma_size)]
+
+ /* Restore condition codes */
+@@ -165,10 +169,12 @@ floppy_fifo_emptied:
+ floppy_overrun:
+ sethi %hi(pdma_vaddr), %l5
+ st %l4, [%l5 + %lo(pdma_vaddr)]
++ B2B_SINGLE_NOP
+ sethi %hi(pdma_size), %l5
+ st %l6, [%l5 + %lo(pdma_size)]
+ /* Prevent recursion */
+ sethi %hi(doing_pdma), %l7
++ B2B_SINGLE_NOP
+ st %g0, [%l7 + %lo(doing_pdma)]
+
+ /* fall through... */
+@@ -323,8 +329,9 @@ linux_trap_ipi15_sun4m:
+ ld [%o5 + %o0], %o5
+ ld [%o5 + 0x00], %o3 ! sun4m_irq_percpu[cpu]->pending
+ andcc %o3, %o2, %g0
++ st %o2, [%o5 + 0x04] ! sun4m_irq_percpu[cpu]->clear=0x80000000
+ be sun4m_nmi_error ! Must be an NMI async memory error
+- st %o2, [%o5 + 0x04] ! sun4m_irq_percpu[cpu]->clear=0x80000000
++ nop
+ WRITE_PAUSE
+ ld [%o5 + 0x00], %g0 ! sun4m_irq_percpu[cpu]->pending
+ WRITE_PAUSE
+@@ -1024,8 +1031,9 @@ ret_sys_call:
+ ld [%sp + STACKFRAME_SZ + PT_NPC], %l1 /* pc = npc */
+ add %l1, 0x4, %l2 /* npc = npc+4 */
+ st %l1, [%sp + STACKFRAME_SZ + PT_PC]
++ st %l2, [%sp + STACKFRAME_SZ + PT_NPC]
+ b ret_trap_entry
+- st %l2, [%sp + STACKFRAME_SZ + PT_NPC]
++ nop
+ 1:
+ /* System call failure, set Carry condition code.
+ * Also, get abs(errno) to return to the process.
+@@ -1038,8 +1046,9 @@ ret_sys_call:
+ ld [%sp + STACKFRAME_SZ + PT_NPC], %l1 /* pc = npc */
+ add %l1, 0x4, %l2 /* npc = npc+4 */
+ st %l1, [%sp + STACKFRAME_SZ + PT_PC]
++ st %l2, [%sp + STACKFRAME_SZ + PT_NPC]
+ b ret_trap_entry
+- st %l2, [%sp + STACKFRAME_SZ + PT_NPC]
++ nop
+
+ linux_syscall_trace2:
+ add %sp, STACKFRAME_SZ, %o0
+@@ -1047,8 +1056,9 @@ linux_syscall_trace2:
+ call syscall_trace
+ add %l1, 0x4, %l2 /* npc = npc+4 */
+ st %l1, [%sp + STACKFRAME_SZ + PT_PC]
++ st %l2, [%sp + STACKFRAME_SZ + PT_NPC]
+ b ret_trap_entry
+- st %l2, [%sp + STACKFRAME_SZ + PT_NPC]
++ nop
+
+
+ /* Saving and restoring the FPU state is best done from lowlevel code.
+@@ -1070,6 +1080,7 @@ fpsave:
+ /* We have an fpqueue to save. */
+ 1:
+ std %fq, [%o2]
++ B2B_SINGLE_NOP
+ fpsave_magic:
+ st %fsr, [%o1]
+ ld [%o1], %g3
+@@ -1086,22 +1097,39 @@ fpsave_magic:
+ st %g2, [%o3]
+
+ std %f0, [%o0 + 0x00]
++ B2B_SINGLE_NOP
+ std %f2, [%o0 + 0x08]
++ B2B_SINGLE_NOP
+ std %f4, [%o0 + 0x10]
++ B2B_SINGLE_NOP
+ std %f6, [%o0 + 0x18]
++ B2B_SINGLE_NOP
+ std %f8, [%o0 + 0x20]
++ B2B_SINGLE_NOP
+ std %f10, [%o0 + 0x28]
++ B2B_SINGLE_NOP
+ std %f12, [%o0 + 0x30]
++ B2B_SINGLE_NOP
+ std %f14, [%o0 + 0x38]
++ B2B_SINGLE_NOP
+ std %f16, [%o0 + 0x40]
++ B2B_SINGLE_NOP
+ std %f18, [%o0 + 0x48]
++ B2B_SINGLE_NOP
+ std %f20, [%o0 + 0x50]
++ B2B_SINGLE_NOP
+ std %f22, [%o0 + 0x58]
++ B2B_SINGLE_NOP
+ std %f24, [%o0 + 0x60]
++ B2B_SINGLE_NOP
+ std %f26, [%o0 + 0x68]
++ B2B_SINGLE_NOP
+ std %f28, [%o0 + 0x70]
++ B2B_SINGLE_NOP
++ std %f30, [%o0 + 0x78]
++ B2B_SINGLE_NOP
+ retl
+- std %f30, [%o0 + 0x78]
++ nop
+
+ /* Thanks for Theo Deraadt and the authors of the Sprite/netbsd/openbsd
+ * code for pointing out this possible deadlock, while we save state
+@@ -1109,8 +1137,9 @@ fpsave_magic:
+ * code has to know how to deal with this.
+ */
+ fpsave_catch:
++ st %fsr, [%o1]
+ b fpsave_magic + 4
+- st %fsr, [%o1]
++ nop
+
+ fpsave_catch2:
+ st %fsr, [%o1] /* In this case, this is the first successful fsr read */
+@@ -1267,8 +1296,9 @@ kuw_patch1:
+ wr %o5, 0x0, %psr ! re-enable interrupts
+ WRITE_PAUSE ! burn baby burn
+ 3:
++ st %g0, [%g6 + TI_W_SAVED] ! no windows saved
+ retl ! return
+- st %g0, [%g6 + TI_W_SAVED] ! no windows saved
++ nop
+
+ .align 4
+ .globl restore_current
+diff --git a/arch/sparc/kernel/etrap_32.S b/arch/sparc/kernel/etrap_32.S
+index 9f243f918619..860df075a355 100644
+--- a/arch/sparc/kernel/etrap_32.S
++++ b/arch/sparc/kernel/etrap_32.S
+@@ -253,6 +253,7 @@ trap_setup_user_stack_is_bolixed:
+ or %glob_tmp, 0x2, %glob_tmp ! or in no_fault bit
+ LEON_PI(sta %glob_tmp, [%g0] ASI_LEON_MMUREGS) ! set it
+ SUN_PI_(sta %glob_tmp, [%g0] ASI_M_MMUREGS) ! set it
++ B2B_DOUBLE_NOP
+
+ /* Dump the registers and cross fingers. */
+ STORE_WINDOW(sp)
+diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S
+index be30c8d4cc73..cdff4d974434 100644
+--- a/arch/sparc/kernel/head_32.S
++++ b/arch/sparc/kernel/head_32.S
+@@ -26,6 +26,7 @@
+ #include <asm/errno.h>
+ #include <asm/pgtable.h> /* PGDIR_SHIFT */
+ #include <asm/export.h>
++#include <asm/asmmacro.h>
+
+ .data
+ /* The following are used with the prom_vector node-ops to figure out
+@@ -365,6 +366,7 @@ execute_in_high_mem:
+
+ sethi %hi(prom_vector_p), %g1
+ st %o0, [%g1 + %lo(prom_vector_p)]
++ B2B_SINGLE_NOP
+
+ sethi %hi(linux_dbvec), %g1
+ st %o1, [%g1 + %lo(linux_dbvec)]
+@@ -465,6 +467,7 @@ sun4d_init:
+ srl %g3, 3, %g4
+ sta %g4, [%g0] ASI_M_VIKING_TMP1
+ sethi %hi(boot_cpu_id), %g5
++ B2B_SINGLE_NOP
+ stb %g4, [%g5 + %lo(boot_cpu_id)]
+ #endif
+
+@@ -550,6 +553,7 @@ continue_boot:
+ #ifdef CONFIG_SMP
+ st %g6, [%g2]
+ add %g2, %g3, %g2
++ B2B_SINGLE_NOP
+ #endif
+ st %g6, [%g2]
+
+@@ -624,21 +628,27 @@ continue_boot:
+ set flush_patch_one, %g5
+ st %g4, [%g5 + 0x18]
+ st %g4, [%g5 + 0x1c]
++ B2B_SINGLE_NOP
+ set flush_patch_two, %g5
+ st %g4, [%g5 + 0x18]
+ st %g4, [%g5 + 0x1c]
++ B2B_SINGLE_NOP
+ set flush_patch_three, %g5
+ st %g4, [%g5 + 0x18]
+ st %g4, [%g5 + 0x1c]
++ B2B_SINGLE_NOP
+ set flush_patch_four, %g5
+ st %g4, [%g5 + 0x18]
+ st %g4, [%g5 + 0x1c]
++ B2B_SINGLE_NOP
+ set flush_patch_exception, %g5
+ st %g4, [%g5 + 0x18]
+ st %g4, [%g5 + 0x1c]
++ B2B_SINGLE_NOP
+ set flush_patch_switch, %g5
+ st %g4, [%g5 + 0x18]
+ st %g4, [%g5 + 0x1c]
++ B2B_SINGLE_NOP
+
+ 2:
+ sethi %hi(nwindows), %g4
+@@ -738,8 +748,9 @@ no_sun4u_here:
+ add %l4, 4, %l4
+ cmp %l5, %l2
+ add %l5, %l6, %l5
++ st %l5, [%l4 - 4]
+ bgeu,a 3f
+- st %l5, [%l4 - 4]
++ nop
+ 3:
+ subcc %l3, 4, %l3
+ bne 2b
+@@ -750,13 +761,15 @@ no_sun4u_here:
+
+ ld [%l1 + (sun4u_r1 - sun4u_a1)], %o1
+ add %l1, (sun4u_a2 - sun4u_a1), %o0
++ st %o1, [%o0 + (sun4u_i2 - sun4u_a2)]
+ call %l0
+- st %o1, [%o0 + (sun4u_i2 - sun4u_a2)]
++ nop
+
+ ld [%l1 + (sun4u_1 - sun4u_a1)], %o1
+ add %l1, (sun4u_a3 - sun4u_a1), %o0
+- call %l0
+ st %o1, [%o0 + (sun4u_i3 - sun4u_a3)]
++ call %l0
++ nop
+
+ call %l0
+ add %l1, (sun4u_a4 - sun4u_a1), %o0
+diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c
+index 1eed26d423fb..f726d950e347 100644
+--- a/arch/sparc/kernel/leon_smp.c
++++ b/arch/sparc/kernel/leon_smp.c
+@@ -44,6 +44,7 @@
+ #include <asm/leon.h>
+ #include <asm/leon_amba.h>
+ #include <asm/timer.h>
++#include <asm/asmmacro.h>
+
+ #include "kernel.h"
+
+@@ -391,9 +392,14 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
+ register unsigned long a4 asm("i4") = arg4;
+ register unsigned long a5 asm("i5") = 0;
+
+- __asm__ __volatile__("std %0, [%6]\n\t"
++ __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP
++ "std %0, [%6]\n\t"
++ B2B_INLINE_SINGLE_NOP
+ "std %2, [%6 + 8]\n\t"
+- "std %4, [%6 + 16]\n\t" : :
++ B2B_INLINE_SINGLE_NOP
++ "std %4, [%6 + 16]\n\t"
++ B2B_INLINE_SINGLE_NOP
++ : :
+ "r"(f), "r"(a1), "r"(a2), "r"(a3),
+ "r"(a4), "r"(a5),
+ "r"(&ccall_info.func));
+diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c
+index ff30f03beb7c..b06eaf4d1256 100644
+--- a/arch/sparc/kernel/sun4d_smp.c
++++ b/arch/sparc/kernel/sun4d_smp.c
+@@ -21,6 +21,7 @@
+ #include <asm/oplib.h>
+ #include <asm/sbi.h>
+ #include <asm/mmu.h>
++#include <asm/asmmacro.h>
+
+ #include "kernel.h"
+ #include "irq.h"
+@@ -304,9 +305,14 @@ static void sun4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
+ register unsigned long a5 asm("i5") = 0;
+
+ __asm__ __volatile__(
++ B2B_INLINE_DOUBLE_NOP
+ "std %0, [%6]\n\t"
++ B2B_INLINE_SINGLE_NOP
+ "std %2, [%6 + 8]\n\t"
+- "std %4, [%6 + 16]\n\t" : :
++ B2B_INLINE_SINGLE_NOP
++ "std %4, [%6 + 16]\n\t"
++ B2B_INLINE_SINGLE_NOP
++ : :
+ "r"(f), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5),
+ "r" (&ccall_info.func));
+ }
+diff --git a/arch/sparc/kernel/una_asm_32.S b/arch/sparc/kernel/una_asm_32.S
+index f8bf839289fb..177011bebe3e 100644
+--- a/arch/sparc/kernel/una_asm_32.S
++++ b/arch/sparc/kernel/una_asm_32.S
+@@ -6,6 +6,7 @@
+ */
+
+ #include <linux/errno.h>
++#include <asm/asmmacro.h>
+
+ .text
+
+@@ -32,26 +33,30 @@ __do_int_store:
+ srl %g1, 24, %g2
+ srl %g1, 16, %g7
+ 4: stb %g2, [%o0]
+- srl %g1, 8, %g2
+ 5: stb %g7, [%o0 + 1]
++ srl %g1, 8, %g2
+ ld [%o2 + 4], %g7
+ 6: stb %g2, [%o0 + 2]
+- srl %g7, 24, %g2
+ 7: stb %g1, [%o0 + 3]
++ srl %g7, 24, %g2
+ srl %g7, 16, %g1
+ 8: stb %g2, [%o0 + 4]
+ srl %g7, 8, %g2
++ B2B_SINGLE_NOP
+ 9: stb %g1, [%o0 + 5]
+ 10: stb %g2, [%o0 + 6]
++11: stb %g7, [%o0 + 7]
+ b 0f
+-11: stb %g7, [%o0 + 7]
+-1: srl %g1, 16, %g7
++ nop
++1:
+ 12: stb %g2, [%o0]
++ srl %g1, 16, %g7
+ srl %g1, 8, %g2
+ 13: stb %g7, [%o0 + 1]
+ 14: stb %g2, [%o0 + 2]
++15: stb %g1, [%o0 + 3]
+ b 0f
+-15: stb %g1, [%o0 + 3]
++ nop
+ 2: srl %g1, 8, %g2
+ 16: stb %g2, [%o0]
+ 17: stb %g1, [%o0 + 1]
+@@ -99,8 +104,9 @@ do_int_load:
+ or %g1, %g2, %g1
+ sll %g1, 16, %g1
+ sra %g1, 16, %g1
+-3: b 0f
+- st %g1, [%o0]
++3: st %g1, [%o0]
++ b 0f
++ nop
+ 6: ldub [%o2 + 1], %g2
+ sll %g1, 24, %g1
+ 7: ldub [%o2 + 2], %g7
+@@ -110,8 +116,9 @@ do_int_load:
+ or %g3, %g2, %g3
+ or %g7, %g3, %g7
+ or %g1, %g7, %g1
++ st %g1, [%o0]
+ b 0f
+- st %g1, [%o0]
++ nop
+ 9: ldub [%o2], %g1
+ 10: ldub [%o2 + 1], %g2
+ sll %g1, 24, %g1
+diff --git a/arch/sparc/kernel/wof.S b/arch/sparc/kernel/wof.S
+index 96a3a112423a..8538818424c0 100644
+--- a/arch/sparc/kernel/wof.S
++++ b/arch/sparc/kernel/wof.S
+@@ -124,6 +124,8 @@ spwin_no_userwins_from_kernel:
+ jmp %t_pc ! Return from trap
+ rett %t_npc ! we are done
+
++ B2B_SINGLE_NOP ! To not trigger delay slot warning
++
+ spwin_exist_uwins:
+ /* LOCATION: Trap window */
+
+@@ -341,7 +343,7 @@ SUN_PI_(lda [%g0] ASI_M_MMUREGS, %glob_tmp) ! read MMU control
+ or %glob_tmp, 0x2, %glob_tmp ! or in no_fault bit
+ LEON_PI(sta %glob_tmp, [%g0] ASI_LEON_MMUREGS) ! set it
+ SUN_PI_(sta %glob_tmp, [%g0] ASI_M_MMUREGS) ! set it
+-
++ B2B_DOUBLE_NOP
+ /* Dump the registers and cross fingers. */
+ STORE_WINDOW(sp)
+
+diff --git a/arch/sparc/lib/blockops.S b/arch/sparc/lib/blockops.S
+index 76ddd1ff6833..9f66d08ff8a3 100644
+--- a/arch/sparc/lib/blockops.S
++++ b/arch/sparc/lib/blockops.S
+@@ -8,19 +8,28 @@
+ #include <linux/linkage.h>
+ #include <asm/page.h>
+ #include <asm/export.h>
++#include <asm/asmmacro.h>
+
+ /* Zero out 64 bytes of memory at (buf + offset).
+ * Assumes %g1 contains zero.
+ */
+ #define BLAST_BLOCK(buf, offset) \
+ std %g0, [buf + offset + 0x38]; \
++ B2B_SINGLE_NOP \
+ std %g0, [buf + offset + 0x30]; \
++ B2B_SINGLE_NOP \
+ std %g0, [buf + offset + 0x28]; \
++ B2B_SINGLE_NOP \
+ std %g0, [buf + offset + 0x20]; \
++ B2B_SINGLE_NOP \
+ std %g0, [buf + offset + 0x18]; \
++ B2B_SINGLE_NOP \
+ std %g0, [buf + offset + 0x10]; \
++ B2B_SINGLE_NOP \
+ std %g0, [buf + offset + 0x08]; \
+- std %g0, [buf + offset + 0x00];
++ B2B_SINGLE_NOP \
++ std %g0, [buf + offset + 0x00]; \
++ B2B_SINGLE_NOP
+
+ /* Copy 32 bytes of memory at (src + offset) to
+ * (dst + offset).
+@@ -31,9 +40,13 @@
+ ldd [src + offset + 0x08], t4; \
+ ldd [src + offset + 0x00], t6; \
+ std t0, [dst + offset + 0x18]; \
++ B2B_SINGLE_NOP \
+ std t2, [dst + offset + 0x10]; \
++ B2B_SINGLE_NOP \
+ std t4, [dst + offset + 0x08]; \
+- std t6, [dst + offset + 0x00];
++ B2B_SINGLE_NOP \
++ std t6, [dst + offset + 0x00]; \
++ B2B_SINGLE_NOP
+
+ /* Profiling evidence indicates that memset() is
+ * commonly called for blocks of size PAGE_SIZE,
+diff --git a/arch/sparc/lib/checksum_32.S b/arch/sparc/lib/checksum_32.S
+index 7488d130faf7..1f5b2daf2d51 100644
+--- a/arch/sparc/lib/checksum_32.S
++++ b/arch/sparc/lib/checksum_32.S
+@@ -190,39 +190,47 @@ cpout: retl ! get outta here
+ * because of this we thus do all the ldd's together to get
+ * Viking MXCC into streaming mode. Ho hum...
+ */
++ /* B2B-FIX-NOTE: The fixup section is affected only by number of
++ * instructions and where the load instructions are located in this
++ * macro. Neither of those factors have been changed.
++ */
+ #define CSUMCOPY_BIGCHUNK(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7) \
+ ldd [src + off + 0x00], t0; \
+ ldd [src + off + 0x08], t2; \
+ ldd [src + off + 0x10], t4; \
+ ldd [src + off + 0x18], t6; \
+ st t0, [dst + off + 0x00]; \
+- addxcc t0, sum, sum; \
+ st t1, [dst + off + 0x04]; \
++ addxcc t0, sum, sum; \
+ addxcc t1, sum, sum; \
+ st t2, [dst + off + 0x08]; \
+- addxcc t2, sum, sum; \
+ st t3, [dst + off + 0x0c]; \
++ addxcc t2, sum, sum; \
+ addxcc t3, sum, sum; \
+ st t4, [dst + off + 0x10]; \
+- addxcc t4, sum, sum; \
+ st t5, [dst + off + 0x14]; \
++ addxcc t4, sum, sum; \
+ addxcc t5, sum, sum; \
+ st t6, [dst + off + 0x18]; \
+- addxcc t6, sum, sum; \
+ st t7, [dst + off + 0x1c]; \
++ addxcc t6, sum, sum; \
+ addxcc t7, sum, sum;
+
+ /* Yuck, 6 superscalar cycles... */
++ /* B2B-FIX-NOTE: The fixup section is affected only by number of
++ * instructions and where the load instructions are located in this
++ * macro. Neither of those factors have been changed.
++ */
+ #define CSUMCOPY_LASTCHUNK(src, dst, sum, off, t0, t1, t2, t3) \
+ ldd [src - off - 0x08], t0; \
+ ldd [src - off - 0x00], t2; \
+ addxcc t0, sum, sum; \
+- st t0, [dst - off - 0x08]; \
+ addxcc t1, sum, sum; \
++ st t0, [dst - off - 0x08]; \
+ st t1, [dst - off - 0x04]; \
+ addxcc t2, sum, sum; \
+- st t2, [dst - off - 0x00]; \
+ addxcc t3, sum, sum; \
++ st t2, [dst - off - 0x00]; \
+ st t3, [dst - off + 0x04];
+
+ /* Handle the end cruft code out of band for better cache patterns. */
+@@ -399,8 +407,8 @@ ccslow: cmp %g1, 0
+ sub %g1, 2, %g1
+ srl %o4, 8, %g2
+ sub %g4, 1, %g4
+- EX(stb %g2, [%o1])
+ add %o4, %g5, %g5
++ EX(stb %g2, [%o1])
+ EX(stb %o4, [%o1 + 1])
+ add %o0, 2, %o0
+ srl %g4, 1, %g4
+@@ -413,10 +421,10 @@ ccslow: cmp %g1, 0
+ srl %o4, 16, %g3
+ EX(stb %g2, [%o1])
+ srl %o4, 8, %g2
+- EX(stb %g3, [%o1 + 1])
+ add %o0, 4, %o0
+- EX(stb %g2, [%o1 + 2])
+ addcc %o4, %g5, %g5
++ EX(stb %g3, [%o1 + 1])
++ EX(stb %g2, [%o1 + 2])
+ EX(stb %o4, [%o1 + 3])
+ addx %g5, %g0, %g5 ! I am now to lazy to optimize this (question it
+ add %o1, 4, %o1 ! is worthy). Maybe some day - with the sll/srl
+@@ -435,8 +443,8 @@ ccslow: cmp %g1, 0
+ srl %o4, 8, %g2
+ add %o0, 2, %o0
+ EX(stb %g2, [%o1])
+- add %g5, %o4, %g5
+ EX(stb %o4, [%o1 + 1])
++ add %g5, %o4, %g5
+ add %o1, 2, %o1
+ 3: be,a 1f
+ sll %g5, 16, %o4
+diff --git a/arch/sparc/lib/copy_user.S b/arch/sparc/lib/copy_user.S
+index dc72f2b970b7..b7cd5165497d 100644
+--- a/arch/sparc/lib/copy_user.S
++++ b/arch/sparc/lib/copy_user.S
+@@ -17,6 +17,7 @@
+ #include <asm/page.h>
+ #include <asm/thread_info.h>
+ #include <asm/export.h>
++#include <asm/asmmacro.h>
+
+ /* Work around cpp -rob */
+ #define ALLOC #alloc
+@@ -82,12 +83,12 @@
+
+ #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
+ ldd [%src + (offset) + 0x00], %t0; \
+- ldd [%src + (offset) + 0x08], %t2; \
+- ldd [%src + (offset) + 0x10], %t4; \
+- ldd [%src + (offset) + 0x18], %t6; \
+ std %t0, [%dst + (offset) + 0x00]; \
++ ldd [%src + (offset) + 0x08], %t2; \
+ std %t2, [%dst + (offset) + 0x08]; \
++ ldd [%src + (offset) + 0x10], %t4; \
+ std %t4, [%dst + (offset) + 0x10]; \
++ ldd [%src + (offset) + 0x18], %t6; \
+ std %t6, [%dst + (offset) + 0x18];
+
+ #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
+diff --git a/arch/sparc/lib/locks.S b/arch/sparc/lib/locks.S
+index 9a1289a3fb28..066717755d67 100644
+--- a/arch/sparc/lib/locks.S
++++ b/arch/sparc/lib/locks.S
+@@ -92,7 +92,16 @@ ___rw_write_enter:
+ bne ___rw_write_enter_spin_on_wlock
+ ld [%g1], %g2
+ andncc %g2, 0xff, %g0
++#ifdef __FIX_LEON3FT_B2BST
++ be 1f
++ nop
++ stb %g0, [%g1 + 3]
++ b ___rw_write_enter_spin_on_wlock
++ nop
++1:
++#else
+ bne,a ___rw_write_enter_spin_on_wlock
+ stb %g0, [%g1 + 3]
++#endif
+ retl
+ mov %g4, %o7
+diff --git a/arch/sparc/lib/memcpy.S b/arch/sparc/lib/memcpy.S
+index ee823d8c9215..dac6d6f0fe3c 100644
+--- a/arch/sparc/lib/memcpy.S
++++ b/arch/sparc/lib/memcpy.S
+@@ -32,12 +32,12 @@ x:
+
+ #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
+ ldd [%src + (offset) + 0x00], %t0; \
+- ldd [%src + (offset) + 0x08], %t2; \
+- ldd [%src + (offset) + 0x10], %t4; \
+- ldd [%src + (offset) + 0x18], %t6; \
+ std %t0, [%dst + (offset) + 0x00]; \
++ ldd [%src + (offset) + 0x08], %t2; \
+ std %t2, [%dst + (offset) + 0x08]; \
++ ldd [%src + (offset) + 0x10], %t4; \
+ std %t4, [%dst + (offset) + 0x10]; \
++ ldd [%src + (offset) + 0x18], %t6; \
+ std %t6, [%dst + (offset) + 0x18];
+
+ #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
+@@ -50,8 +50,8 @@ x:
+
+ #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
+ ldd [%src - (offset) - 0x10], %t0; \
+- ldd [%src - (offset) - 0x08], %t2; \
+ std %t0, [%dst - (offset) - 0x10]; \
++ ldd [%src - (offset) - 0x08], %t2; \
+ std %t2, [%dst - (offset) - 0x08];
+
+ #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
+@@ -192,8 +192,8 @@ EXPORT_SYMBOL(memcpy)
+
+ ldd [%o1], %g2
+ add %o0, 8, %o0
+- st %g2, [%o0 - 0x08]
+ add %o1, 8, %o1
++ st %g2, [%o0 - 0x08]
+ st %g3, [%o0 - 0x04]
+
+ 81: /* memcpy_last7 */
+diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S
+index f427f34b8b79..77ea205b8d66 100644
+--- a/arch/sparc/lib/memset.S
++++ b/arch/sparc/lib/memset.S
+@@ -11,6 +11,7 @@
+
+ #include <asm/ptrace.h>
+ #include <asm/export.h>
++#include <asm/asmmacro.h>
+
+ /* Work around cpp -rob */
+ #define ALLOC #alloc
+@@ -39,23 +40,39 @@
+ * Store 64 bytes at (BASE + OFFSET) using value SOURCE. */
+ #define ZERO_BIG_BLOCK(base, offset, source) \
+ std source, [base + offset + 0x00]; \
++ B2B_SINGLE_NOP \
+ std source, [base + offset + 0x08]; \
++ B2B_SINGLE_NOP \
+ std source, [base + offset + 0x10]; \
++ B2B_SINGLE_NOP \
+ std source, [base + offset + 0x18]; \
++ B2B_SINGLE_NOP \
+ std source, [base + offset + 0x20]; \
++ B2B_SINGLE_NOP \
+ std source, [base + offset + 0x28]; \
++ B2B_SINGLE_NOP \
+ std source, [base + offset + 0x30]; \
+- std source, [base + offset + 0x38];
++ B2B_SINGLE_NOP \
++ std source, [base + offset + 0x38]; \
++ B2B_SINGLE_NOP
+
+ #define ZERO_LAST_BLOCKS(base, offset, source) \
+ std source, [base - offset - 0x38]; \
++ B2B_SINGLE_NOP \
+ std source, [base - offset - 0x30]; \
++ B2B_SINGLE_NOP \
+ std source, [base - offset - 0x28]; \
++ B2B_SINGLE_NOP \
+ std source, [base - offset - 0x20]; \
++ B2B_SINGLE_NOP \
+ std source, [base - offset - 0x18]; \
++ B2B_SINGLE_NOP \
+ std source, [base - offset - 0x10]; \
++ B2B_SINGLE_NOP \
+ std source, [base - offset - 0x08]; \
+- std source, [base - offset - 0x00];
++ B2B_SINGLE_NOP \
++ std source, [base - offset - 0x00]; \
++ B2B_SINGLE_NOP
+
+ .text
+ .align 4
+@@ -82,12 +99,14 @@ memset:
+ mov %o2, %o1
+ 3:
+ cmp %o2, 3
++ EX(stb %g3, [%o0], sub %o1, 0)
+ be 2f
+- EX(stb %g3, [%o0], sub %o1, 0)
++ nop
+
+ cmp %o2, 2
++ EX(stb %g3, [%o0 + 0x01], sub %o1, 1)
+ be 2f
+- EX(stb %g3, [%o0 + 0x01], sub %o1, 1)
++ nop
+
+ EX(stb %g3, [%o0 + 0x02], sub %o1, 2)
+ 2:
+@@ -132,7 +151,11 @@ __bzero:
+ be 13f
+ andcc %o1, 7, %o1
+
+- srl %o2, 1, %o3
++#ifdef __FIX_LEON3FT_B2BST
++ mov %o2, %o3 /* 8 bytes of std+nop sets 8 bytes of memory */
++#else
++ srl %o2, 1, %o3 /* 4 bytes of std sets 8 bytes of memory */
++#endif
+ set 13f, %o4
+ sub %o4, %o3, %o4
+ jmp %o4
+@@ -158,8 +181,9 @@ __bzero:
+ EX(sth %g3, [%o0], and %o1, 3)
+ add %o0, 2, %o0
+ 1:
+- bne,a 8f
+- EX(stb %g3, [%o0], and %o1, 1)
++ be 8f
++ nop
++ EX(stb %g3, [%o0], and %o1, 1)
+ 8:
+ b 0f
+ nop
+@@ -171,8 +195,9 @@ __bzero:
+ 8:
+ add %o0, 1, %o0
+ subcc %o1, 1, %o1
++ EX(stb %g3, [%o0 - 1], add %o1, 1)
+ bne 8b
+- EX(stb %g3, [%o0 - 1], add %o1, 1)
++ nop
+ 0:
+ andcc %g4, 1, %g0
+ be 5f
+@@ -180,23 +205,56 @@ __bzero:
+ retl
+ mov %g1, %o0
+ 5:
++ clr %o0
+ retl
+- clr %o0
++ nop
+ __memset_end:
+
+ .section .fixup,#alloc,#execinstr
+ .align 4
+ 20:
++ /*
++ * We got a fault in the 10: to 11: address range.
++ *
++ * At this point:
++ * - %g2 now contains the index (within the range) of the instruction that
++ * got the fault.
++ * - %o1 contains the number of bytes that were left to set/zero before
++ * entering the loop the first time.
++ * - %l3 contains the number of bytes left for the loop to set/zero
++ * (but adjusted in the middle of the loop)
++ *
++ */
++#ifdef __FIX_LEON3FT_B2BST
++ cmp %g2, 16 /* Double number of instructions per half */
++#else
+ cmp %g2, 8
++#endif
+ bleu 1f
+ and %o1, 0x7f, %o1
+- sub %g2, 9, %g2
+- add %o3, 64, %o3
++ /* We were in second half of the 10: to 11: block */
++#ifdef __FIX_LEON3FT_B2BST
++ sub %g2, 17, %g2 /* Adjust index: 8 std + nop pairs + one subcc */
++#else
++ sub %g2, 9, %g2 /* Adjust index to start of ZERO_BIG_BLOCK */
++#endif
++ add %o3, 64, %o3 /* Adjust bytes left in turn of the loop */
++ /* (due to the subcc being in the middle ) */
+ 1:
+- sll %g2, 3, %g2
+- add %o3, %o1, %o0
++ /*
++ * Convert index of faulting instruction within ZERO_BIG_BLOCK to
++ * number of bytes written
++ */
++#ifdef __FIX_LEON3FT_B2BST
++ sll %g2, 2, %g2 /* 8 bytes is written per 2 instructions (std+nop) */
++#else
++ sll %g2, 3, %g2 /* 8 bytes is written per std instruction */
++#endif
++ add %o3, %o1, %o0 /* Bytes left before faulting ZERO_BIG_BLOCK */
+ b 30f
+- sub %o0, %g2, %o0
++ sub %o0, %g2, %o0 /* Subtract bytes written by the faulting */
++ /* ZERO_BIG_BLOCK => the number of bytes */
++ /* that were not set/zeroed. */
+ 21:
+ mov 8, %o0
+ and %o1, 7, %o1
+diff --git a/arch/sparc/mm/hypersparc.S b/arch/sparc/mm/hypersparc.S
+index 6c2521e85a42..513ea55441b7 100644
+--- a/arch/sparc/mm/hypersparc.S
++++ b/arch/sparc/mm/hypersparc.S
+@@ -13,6 +13,7 @@
+ #include <asm/pgtable.h>
+ #include <asm/pgtsrmmu.h>
+ #include <linux/init.h>
++#include <asm/asmmacro.h>
+
+ .text
+ .align 4
+@@ -32,10 +33,12 @@ hypersparc_flush_cache_all:
+ ld [%g1 + %lo(vac_line_size)], %g2
+ 1:
+ subcc %g5, %g2, %g5 ! hyper_flush_unconditional_combined
++ sta %g0, [%g5] ASI_M_FLUSH_CTX
+ bne 1b
+- sta %g0, [%g5] ASI_M_FLUSH_CTX
++ nop
++ sta %g0, [%g0] ASI_M_FLUSH_IWHOLE ! hyper_flush_whole_icache
+ retl
+- sta %g0, [%g0] ASI_M_FLUSH_IWHOLE ! hyper_flush_whole_icache
++ nop
+
+ /* We expand the window flush to get maximum performance. */
+ hypersparc_flush_cache_mm:
+@@ -68,8 +71,9 @@ hypersparc_flush_cache_mm:
+ sta %g0, [%o0 + %g3] ASI_M_FLUSH_USER
+ sta %g0, [%o0 + %g4] ASI_M_FLUSH_USER
+ sta %g0, [%o0 + %g5] ASI_M_FLUSH_USER
++ sta %g0, [%o0 + %o4] ASI_M_FLUSH_USER
+ bne 1b
+- sta %g0, [%o0 + %o4] ASI_M_FLUSH_USER
++ nop
+ hypersparc_flush_cache_mm_out:
+ retl
+ nop
+@@ -117,8 +121,9 @@ hypersparc_flush_cache_range:
+ sta %g0, [%o3 + %g2] ASI_M_FLUSH_USER
+ sta %g0, [%o3 + %g3] ASI_M_FLUSH_USER
+ sta %g0, [%o3 + %g4] ASI_M_FLUSH_USER
++ sta %g0, [%o3 + %g5] ASI_M_FLUSH_USER
+ bne 1b
+- sta %g0, [%o3 + %g5] ASI_M_FLUSH_USER
++ nop
+ retl
+ nop
+
+@@ -145,9 +150,11 @@ hypersparc_flush_cache_range:
+ sta %g0, [%o2 + %g2] ASI_M_FLUSH_PAGE
+ sta %g0, [%o2 + %g3] ASI_M_FLUSH_PAGE
+ andcc %o2, 0xffc, %g0
++ B2B_SINGLE_NOP
+ sta %g0, [%o2 + %g4] ASI_M_FLUSH_PAGE
++ sta %g0, [%o2 + %g5] ASI_M_FLUSH_PAGE
+ bne 2b
+- sta %g0, [%o2 + %g5] ASI_M_FLUSH_PAGE
++ nop
+ 3:
+ cmp %o2, %o1
+ bne 1b
+@@ -202,9 +209,11 @@ hypersparc_flush_cache_page:
+ sta %g0, [%o1 + %g2] ASI_M_FLUSH_PAGE
+ sta %g0, [%o1 + %g3] ASI_M_FLUSH_PAGE
+ andcc %o1, 0xffc, %g0
++ B2B_SINGLE_NOP
+ sta %g0, [%o1 + %g4] ASI_M_FLUSH_PAGE
++ sta %g0, [%o1 + %g5] ASI_M_FLUSH_PAGE
+ bne 1b
+- sta %g0, [%o1 + %g5] ASI_M_FLUSH_PAGE
++ nop
+ 2:
+ mov SRMMU_FAULT_STATUS, %g7
+ mov SRMMU_CTX_REG, %g4
+@@ -247,9 +256,11 @@ hypersparc_flush_page_to_ram:
+ sta %g0, [%o0 + %g2] ASI_M_FLUSH_PAGE
+ sta %g0, [%o0 + %g3] ASI_M_FLUSH_PAGE
+ andcc %o0, 0xffc, %g0
++ B2B_SINGLE_NOP
+ sta %g0, [%o0 + %g4] ASI_M_FLUSH_PAGE
++ sta %g0, [%o0 + %g5] ASI_M_FLUSH_PAGE
+ bne 1b
+- sta %g0, [%o0 + %g5] ASI_M_FLUSH_PAGE
++ nop
+ 2:
+ mov SRMMU_FAULT_STATUS, %g1
+ retl
+@@ -282,8 +293,9 @@ hypersparc_flush_tlb_mm:
+ sta %o1, [%g1] ASI_M_MMUREGS
+ sta %g0, [%g2] ASI_M_FLUSH_PROBE
+ hypersparc_flush_tlb_mm_out:
++ sta %g5, [%g1] ASI_M_MMUREGS
+ retl
+- sta %g5, [%g1] ASI_M_MMUREGS
++ nop
+
+ hypersparc_flush_tlb_range:
+ ld [%o0 + VMA_VM_MM], %o0
+@@ -298,15 +310,16 @@ hypersparc_flush_tlb_range:
+ sta %o3, [%g1] ASI_M_MMUREGS
+ and %o1, %o4, %o1
+ add %o1, 0x200, %o1
+- sta %g0, [%o1] ASI_M_FLUSH_PROBE
+ 1:
++ sta %g0, [%o1] ASI_M_FLUSH_PROBE
+ sub %o1, %o4, %o1
+ cmp %o1, %o2
+- blu,a 1b
+- sta %g0, [%o1] ASI_M_FLUSH_PROBE
++ blu 1b
++ nop
+ hypersparc_flush_tlb_range_out:
++ sta %g5, [%g1] ASI_M_MMUREGS
+ retl
+- sta %g5, [%g1] ASI_M_MMUREGS
++ nop
+
+ hypersparc_flush_tlb_page:
+ ld [%o0 + VMA_VM_MM], %o0
+@@ -321,8 +334,9 @@ hypersparc_flush_tlb_page:
+ sta %o3, [%g1] ASI_M_MMUREGS
+ sta %g0, [%o1] ASI_M_FLUSH_PROBE
+ hypersparc_flush_tlb_page_out:
++ sta %g5, [%g1] ASI_M_MMUREGS
+ retl
+- sta %g5, [%g1] ASI_M_MMUREGS
++ nop
+
+ __INIT
+
+@@ -340,12 +354,19 @@ hypersparc_bzero_1page:
+ mov 16, %o1
+ 1:
+ stda %g0, [%o0 + %g0] ASI_M_BFILL
++ B2B_SINGLE_NOP
+ stda %g0, [%o0 + %g2] ASI_M_BFILL
++ B2B_SINGLE_NOP
+ stda %g0, [%o0 + %g3] ASI_M_BFILL
++ B2B_SINGLE_NOP
+ stda %g0, [%o0 + %g4] ASI_M_BFILL
++ B2B_SINGLE_NOP
+ stda %g0, [%o0 + %g5] ASI_M_BFILL
++ B2B_SINGLE_NOP
+ stda %g0, [%o0 + %g7] ASI_M_BFILL
++ B2B_SINGLE_NOP
+ stda %g0, [%o0 + %o2] ASI_M_BFILL
++ B2B_SINGLE_NOP
+ stda %g0, [%o0 + %o3] ASI_M_BFILL
+ subcc %o1, 1, %o1
+ bne 1b
+@@ -361,17 +382,24 @@ hypersparc_copy_1page:
+ 1:
+ sta %o0, [%o0 + %o2] ASI_M_BCOPY
+ add %o0, 32, %o0
++ B2B_SINGLE_NOP
+ sta %o0, [%o0 + %o2] ASI_M_BCOPY
+ add %o0, 32, %o0
++ B2B_SINGLE_NOP
+ sta %o0, [%o0 + %o2] ASI_M_BCOPY
+ add %o0, 32, %o0
++ B2B_SINGLE_NOP
+ sta %o0, [%o0 + %o2] ASI_M_BCOPY
+ add %o0, 32, %o0
++ B2B_SINGLE_NOP
+ sta %o0, [%o0 + %o2] ASI_M_BCOPY
+ add %o0, 32, %o0
++ B2B_SINGLE_NOP
+ sta %o0, [%o0 + %o2] ASI_M_BCOPY
++ B2B_SINGLE_NOP
+ add %o0, 32, %o0
+ sta %o0, [%o0 + %o2] ASI_M_BCOPY
++ B2B_SINGLE_NOP
+ add %o0, 32, %o0
+ sta %o0, [%o0 + %o2] ASI_M_BCOPY
+ subcc %g1, 1, %g1
+diff --git a/arch/sparc/mm/leon_mm.c b/arch/sparc/mm/leon_mm.c
+index f8ac99759ed3..c2e0b2905035 100644
+--- a/arch/sparc/mm/leon_mm.c
++++ b/arch/sparc/mm/leon_mm.c
+@@ -16,6 +16,7 @@
+ #include <asm/leon.h>
+ #include <asm/tlbflush.h>
+ #include <asm/pgtsrmmu.h>
++#include <asm/asmmacro.h>
+
+ #include "mm_32.h"
+
+@@ -188,7 +189,10 @@ void leon_flush_icache_all(void)
+
+ void leon_flush_dcache_all(void)
+ {
+- __asm__ __volatile__("sta %%g0, [%%g0] %0\n\t" : :
++ __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP
++ "sta %%g0, [%%g0] %0\n\t"
++ B2B_INLINE_DOUBLE_NOP
++ : :
+ "i"(ASI_LEON_DFLUSH) : "memory");
+ }
+
+@@ -201,15 +205,21 @@ void leon_flush_pcache_all(struct vm_area_struct *vma, unsigned long page)
+
+ void leon_flush_cache_all(void)
+ {
++ __asm__ __volatile__(B2B_INLINE_SINGLE_NOP);
+ __asm__ __volatile__(".align 32\nflush\n.align 32\n"); /*iflush*/
+ __asm__ __volatile__("sta %%g0, [%%g0] %0\n\t" : :
+ "i"(ASI_LEON_DFLUSH) : "memory");
++ __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP);
++
+ }
+
+ void leon_flush_tlb_all(void)
+ {
+ leon_flush_cache_all();
+- __asm__ __volatile__("sta %%g0, [%0] %1\n\t" : : "r"(0x400),
++ __asm__ __volatile__(B2B_INLINE_DOUBLE_NOP
++ "sta %%g0, [%0] %1\n\t"
++ B2B_INLINE_DOUBLE_NOP
++ : : "r"(0x400),
+ "i"(ASI_LEON_MMUFLUSH) : "memory");
+ }
+
+diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
+index 4ab2e43e93a1..0d523aaf2c1c 100644
+--- a/arch/sparc/mm/srmmu.c
++++ b/arch/sparc/mm/srmmu.c
+@@ -26,6 +26,7 @@
+ #include <asm/mmu_context.h>
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
++#include <asm/asmmacro.h>
+ #include <asm/io-unit.h>
+ #include <asm/pgalloc.h>
+ #include <asm/pgtable.h>
+@@ -129,7 +130,9 @@ static void msi_set_sync(void)
+ {
+ __asm__ __volatile__ ("lda [%0] %1, %%g3\n\t"
+ "andn %%g3, %2, %%g3\n\t"
+- "sta %%g3, [%0] %1\n\t" : :
++ "sta %%g3, [%0] %1\n\t"
++ B2B_INLINE_DOUBLE_NOP
++ : :
+ "r" (MSI_MBUS_ARBEN),
+ "i" (ASI_M_CTL), "r" (MSI_ASYNC_MODE) : "g3");
+ }
+diff --git a/arch/sparc/mm/swift.S b/arch/sparc/mm/swift.S
+index f414bfd8d899..303b86ff3864 100644
+--- a/arch/sparc/mm/swift.S
++++ b/arch/sparc/mm/swift.S
+@@ -10,6 +10,7 @@
+ #include <asm/page.h>
+ #include <asm/pgtsrmmu.h>
+ #include <asm/asm-offsets.h>
++#include <asm/asmmacro.h>
+
+ .text
+ .align 4
+@@ -32,8 +33,9 @@ swift_flush_page_to_ram:
+ 1: subcc %o0, 0x10, %o0
+ add %o0, %o0, %o1
+ sta %g0, [%o0] ASI_M_DATAC_TAG
++ sta %g0, [%o1] ASI_M_TXTC_TAG
+ bne 1b
+- sta %g0, [%o1] ASI_M_TXTC_TAG
++ nop
+ retl
+ nop
+ #else
+@@ -46,8 +48,9 @@ swift_flush_cache_all:
+ sethi %hi(16 * 1024), %o0
+ 1: subcc %o0, 16, %o0
+ sta %g0, [%o0] ASI_M_TXTC_TAG
++ sta %g0, [%o0] ASI_M_DATAC_TAG
+ bne 1b
+- sta %g0, [%o0] ASI_M_DATAC_TAG
++ nop
+ retl
+ nop
+
+diff --git a/arch/sparc/mm/tsunami.S b/arch/sparc/mm/tsunami.S
+index 62b742df65dc..fc0c18e74408 100644
+--- a/arch/sparc/mm/tsunami.S
++++ b/arch/sparc/mm/tsunami.S
+@@ -11,6 +11,7 @@
+ #include <asm/asi.h>
+ #include <asm/page.h>
+ #include <asm/pgtsrmmu.h>
++#include <asm/asmmacro.h>
+
+ .text
+ .align 4
+@@ -81,8 +82,9 @@ tsunami_flush_tlb_page:
+ nop
+ nop
+ tsunami_flush_tlb_page_out:
++ sta %g5, [%g1] ASI_M_MMUREGS
+ retl
+- sta %g5, [%g1] ASI_M_MMUREGS
++ nop
+
+ #define MIRROR_BLOCK(dst, src, offset, t0, t1, t2, t3) \
+ ldd [src + offset + 0x18], t0; \
+diff --git a/arch/sparc/mm/viking.S b/arch/sparc/mm/viking.S
+index 48f062de7a7f..de913516fdeb 100644
+--- a/arch/sparc/mm/viking.S
++++ b/arch/sparc/mm/viking.S
+@@ -16,6 +16,7 @@
+ #include <asm/pgtable.h>
+ #include <asm/pgtsrmmu.h>
+ #include <asm/viking.h>
++#include <asm/asmmacro.h>
+
+ #ifdef CONFIG_SMP
+ .data
+@@ -99,8 +100,8 @@ viking_mxcc_flush_page:
+ sub %g3, MXCC_STREAM_SIZE, %g3
+ 6:
+ stda %g2, [%o2] ASI_M_MXCC
+- stda %g2, [%o3] ASI_M_MXCC
+ andncc %g3, PAGE_MASK, %g0
++ stda %g2, [%o3] ASI_M_MXCC
+ bne 6b
+ sub %g3, MXCC_STREAM_SIZE, %g3
+
+@@ -128,8 +129,9 @@ viking_flush_cache_out:
+
+ viking_flush_tlb_all:
+ mov 0x400, %g1
++ sta %g0, [%g1] ASI_M_FLUSH_PROBE
+ retl
+- sta %g0, [%g1] ASI_M_FLUSH_PROBE
++ nop
+
+ viking_flush_tlb_mm:
+ mov SRMMU_CTX_REG, %g1
+@@ -142,8 +144,9 @@ viking_flush_tlb_mm:
+ mov 0x300, %g2
+ sta %o1, [%g1] ASI_M_MMUREGS
+ sta %g0, [%g2] ASI_M_FLUSH_PROBE
++ sta %g5, [%g1] ASI_M_MMUREGS
+ retl
+- sta %g5, [%g1] ASI_M_MMUREGS
++ nop
+ #ifndef CONFIG_SMP
+ 1: retl
+ nop
+@@ -162,13 +165,14 @@ viking_flush_tlb_range:
+ sta %o3, [%g1] ASI_M_MMUREGS
+ and %o1, %o4, %o1
+ add %o1, 0x200, %o1
+- sta %g0, [%o1] ASI_M_FLUSH_PROBE
+-1: sub %o1, %o4, %o1
++1: sta %g0, [%o1] ASI_M_FLUSH_PROBE
++ sub %o1, %o4, %o1
+ cmp %o1, %o2
+- blu,a 1b
+- sta %g0, [%o1] ASI_M_FLUSH_PROBE
++ blu 1b
++ nop
++ sta %g5, [%g1] ASI_M_MMUREGS
+ retl
+- sta %g5, [%g1] ASI_M_MMUREGS
++ nop
+ #ifndef CONFIG_SMP
+ 2: retl
+ nop
+@@ -186,8 +190,9 @@ viking_flush_tlb_page:
+ and %o1, PAGE_MASK, %o1
+ sta %o3, [%g1] ASI_M_MMUREGS
+ sta %g0, [%o1] ASI_M_FLUSH_PROBE
++ sta %g5, [%g1] ASI_M_MMUREGS
+ retl
+- sta %g5, [%g1] ASI_M_MMUREGS
++ nop
+ #ifndef CONFIG_SMP
+ 1: retl
+ nop
+@@ -209,8 +214,9 @@ sun4dsmp_flush_tlb_all:
+ bne 2f
+ mov 0x400, %g1
+ sta %g0, [%g1] ASI_M_FLUSH_PROBE
++ stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
+ retl
+- stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
++ nop
+ 2: tst %g5
+ bne,a 2b
+ ldub [%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
+@@ -228,8 +234,9 @@ sun4dsmp_flush_tlb_mm:
+ sta %o1, [%g1] ASI_M_MMUREGS
+ sta %g0, [%g2] ASI_M_FLUSH_PROBE
+ sta %g5, [%g1] ASI_M_MMUREGS
++ stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
+ retl
+- stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
++ nop
+ 2: tst %g5
+ bne,a 2b
+ ldub [%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
+@@ -248,14 +255,15 @@ sun4dsmp_flush_tlb_range:
+ sta %o3, [%g1] ASI_M_MMUREGS
+ and %o1, %o4, %o1
+ add %o1, 0x200, %o1
+- sta %g0, [%o1] ASI_M_FLUSH_PROBE
+-2: sub %o1, %o4, %o1
++2: sta %g0, [%o1] ASI_M_FLUSH_PROBE
++ sub %o1, %o4, %o1
+ cmp %o1, %o2
+- blu,a 2b
+- sta %g0, [%o1] ASI_M_FLUSH_PROBE
++ blu 2b
++ nop
+ sta %g5, [%g1] ASI_M_MMUREGS
++ stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
+ retl
+- stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
++ nop
+ 3: tst %g5
+ bne,a 3b
+ ldub [%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
+@@ -274,8 +282,9 @@ sun4dsmp_flush_tlb_page:
+ sta %o3, [%g1] ASI_M_MMUREGS
+ sta %g0, [%o1] ASI_M_FLUSH_PROBE
+ sta %g5, [%g1] ASI_M_MMUREGS
++ stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
+ retl
+- stb %g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
++ nop
+ 2: tst %g5
+ bne,a 2b
+ ldub [%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
+--
+2.34.1
+
--- /dev/null
+From a09231243da4a9a57dfe680e0b349c227608b150 Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Thu, 20 Aug 2020 15:58:01 +0200
+Subject: [PATCH 22/32] sparc32: leon: Remove optimization giving b2bst false
+ positives for the paranoid
+
+The -fno-jump-tables removes a lot of false positives, but is not known
+to actually be needed.
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ arch/sparc/Makefile | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
+index b63401ecbfbf..aa9d12c54e4b 100644
+--- a/arch/sparc/Makefile
++++ b/arch/sparc/Makefile
+@@ -41,7 +41,7 @@ KBUILD_CFLAGS += -Wa,-Av8
+ KBUILD_AFLAGS += -m32 -mcpu=$(SPARC_MCPU) -Wa,-Av8
+
+ ifeq ($(CONFIG_SPARC_LEON_FIX_UT700),y)
+-KBUILD_CFLAGS += -mfix-ut700
++KBUILD_CFLAGS += -mfix-ut700 -fno-jump-tables
+ KBUILD_AFLAGS += -mfix-ut700
+ endif
+
+--
+2.34.1
+
--- /dev/null
+From e9027970989cb0f08a83057731f99b260247700d Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Fri, 28 Aug 2020 09:44:36 +0200
+Subject: [PATCH 23/32] sparc32,leon: Add erratafix for TN-0010
+
+This fix relies upon a gcc version that sets the define
+__FIX_LEON3FT_TN0010 based on -mfix-ut700.
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ arch/sparc/include/asm/asmmacro.h | 8 ++++++++
+ arch/sparc/include/asm/cmpxchg_32.h | 8 ++++++--
+ arch/sparc/include/asm/pgtable_32.h | 3 ++-
+ arch/sparc/include/asm/sbi.h | 3 ++-
+ arch/sparc/include/asm/spinlock_32.h | 7 +++++--
+ arch/sparc/kernel/leon_smp.c | 3 ++-
+ arch/sparc/kernel/sun4d_smp.c | 3 ++-
+ arch/sparc/kernel/sun4m_smp.c | 3 ++-
+ 8 files changed, 29 insertions(+), 9 deletions(-)
+
+diff --git a/arch/sparc/include/asm/asmmacro.h b/arch/sparc/include/asm/asmmacro.h
+index 687269d581d1..d1f894990f6a 100644
+--- a/arch/sparc/include/asm/asmmacro.h
++++ b/arch/sparc/include/asm/asmmacro.h
+@@ -55,4 +55,12 @@
+ #define B2B_INLINE_DOUBLE_NOP ""
+ #endif
+
++#ifdef __FIX_LEON3FT_TN0010
++#define TN0010_NOP nop;
++#define TN0010_INLINE_NOP "nop\n\t"
++#else
++#define TN0010_NOP
++#define TN0010_INLINE_NOP ""
++#endif
++
+ #endif /* !(_SPARC_ASMMACRO_H) */
+diff --git a/arch/sparc/include/asm/cmpxchg_32.h b/arch/sparc/include/asm/cmpxchg_32.h
+index 906380661c29..03955ea70a26 100644
+--- a/arch/sparc/include/asm/cmpxchg_32.h
++++ b/arch/sparc/include/asm/cmpxchg_32.h
+@@ -14,9 +14,12 @@
+
+ #ifdef CONFIG_SPARC_LEON_CAS
+
++#include <asm/asmmacro.h>
++
+ static inline unsigned long xchg_u32(__volatile__ unsigned long *m, unsigned long val)
+ {
+- __asm__ __volatile__("swap [%2], %0"
++ __asm__ __volatile__(TN0010_INLINE_NOP
++ "swap [%2], %0"
+ : "=&r" (val)
+ : "0" (val), "r" (m)
+ : "memory");
+@@ -64,7 +67,8 @@ void __cmpxchg_called_with_bad_pointer(void);
+ static inline unsigned long
+ __cmpxchg_u32(volatile int *m, int old, int new)
+ {
+- __asm__ __volatile__("casa [%2] 0xb, %3, %0"
++ __asm__ __volatile__(TN0010_INLINE_NOP
++ "casa [%2] 0xb, %3, %0"
+ : "=&r" (new)
+ : "0" (new), "r" (m), "r" (old)
+ : "memory");
+diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
+index 632cdb959542..bfc87c0404b7 100644
+--- a/arch/sparc/include/asm/pgtable_32.h
++++ b/arch/sparc/include/asm/pgtable_32.h
+@@ -106,7 +106,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
+ */
+ static inline unsigned long srmmu_swap(unsigned long *addr, unsigned long value)
+ {
+- __asm__ __volatile__("swap [%2], %0" :
++ __asm__ __volatile__(TN0010_INLINE_NOP
++ "swap [%2], %0" :
+ "=&r" (value) : "0" (value), "r" (addr) : "memory");
+ return value;
+ }
+diff --git a/arch/sparc/include/asm/sbi.h b/arch/sparc/include/asm/sbi.h
+index 49b4e0aa4689..12412ece961d 100644
+--- a/arch/sparc/include/asm/sbi.h
++++ b/arch/sparc/include/asm/sbi.h
+@@ -70,7 +70,8 @@ struct sbi_regs {
+
+ static inline int acquire_sbi(int devid, int mask)
+ {
+- __asm__ __volatile__ ("swapa [%2] %3, %0" :
++ __asm__ __volatile__ (TN0010_INLINE_NOP
++ "swapa [%2] %3, %0" :
+ "=r" (mask) :
+ "0" (mask),
+ "r" (ECSR_DEV_BASE(devid) | SBI_INTR_STATE),
+diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
+index adade4095cf2..44d3e2679138 100644
+--- a/arch/sparc/include/asm/spinlock_32.h
++++ b/arch/sparc/include/asm/spinlock_32.h
+@@ -19,6 +19,7 @@
+ static inline void arch_spin_lock(arch_spinlock_t *lock)
+ {
+ __asm__ __volatile__(
++ TN0010_INLINE_NOP
+ "\n1:\n\t"
+ "ldstub [%0], %%g2\n\t"
+ "orcc %%g2, 0x0, %%g0\n\t"
+@@ -39,7 +40,8 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
+ static inline int arch_spin_trylock(arch_spinlock_t *lock)
+ {
+ unsigned int result;
+- __asm__ __volatile__("ldstub [%1], %0"
++ __asm__ __volatile__(TN0010_INLINE_NOP
++ "ldstub [%1], %0"
+ : "=r" (result)
+ : "r" (lock)
+ : "memory");
+@@ -149,7 +151,8 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
+ {
+ unsigned int val;
+
+- __asm__ __volatile__("ldstub [%1 + 3], %0"
++ __asm__ __volatile__(TN0010_INLINE_NOP
++ "ldstub [%1 + 3], %0"
+ : "=r" (val)
+ : "r" (&rw->lock)
+ : "memory");
+diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c
+index f726d950e347..1d337aaa25ac 100644
+--- a/arch/sparc/kernel/leon_smp.c
++++ b/arch/sparc/kernel/leon_smp.c
+@@ -63,7 +63,8 @@ int leon_ipi_irq = LEON3_IRQ_IPI_DEFAULT;
+ static inline unsigned long do_swap(volatile unsigned long *ptr,
+ unsigned long val)
+ {
+- __asm__ __volatile__("swapa [%2] %3, %0\n\t" : "=&r"(val)
++ __asm__ __volatile__(TN0010_INLINE_NOP
++ "swapa [%2] %3, %0\n\t" : "=&r"(val)
+ : "0"(val), "r"(ptr), "i"(ASI_LEON_DCACHE_MISS)
+ : "memory");
+ return val;
+diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c
+index b06eaf4d1256..b7f343368ab3 100644
+--- a/arch/sparc/kernel/sun4d_smp.c
++++ b/arch/sparc/kernel/sun4d_smp.c
+@@ -33,7 +33,8 @@ static int smp_highest_cpu;
+
+ static inline unsigned long sun4d_swap(volatile unsigned long *ptr, unsigned long val)
+ {
+- __asm__ __volatile__("swap [%1], %0\n\t" :
++ __asm__ __volatile__(TN0010_INLINE_NOP
++ "swap [%1], %0\n\t" :
+ "=&r" (val), "=&r" (ptr) :
+ "0" (val), "1" (ptr));
+ return val;
+diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c
+index 228a6527082d..73076d436cde 100644
+--- a/arch/sparc/kernel/sun4m_smp.c
++++ b/arch/sparc/kernel/sun4m_smp.c
+@@ -29,7 +29,8 @@
+ static inline unsigned long
+ swap_ulong(volatile unsigned long *ptr, unsigned long val)
+ {
+- __asm__ __volatile__("swap [%1], %0\n\t" :
++ __asm__ __volatile__(TN0010_INLINE_NOP
++ "swap [%1], %0\n\t" :
+ "=&r" (val), "=&r" (ptr) :
+ "0" (val), "1" (ptr));
+ return val;
+--
+2.34.1
+
--- /dev/null
+From 19556adb703d521c012713135e4c1d910086828a Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Wed, 2 Sep 2020 11:47:33 +0200
+Subject: [PATCH 24/32] sparc32,leon: Add errata fix for the TN-0018 errata
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ arch/sparc/Kconfig | 7 ++++
+ arch/sparc/include/asm/head_32.h | 10 ++++++
+ arch/sparc/include/asm/leon.h | 59 ++++++++++++++++++++++++++++++++
+ arch/sparc/kernel/entry.S | 35 +++++++++++++++++--
+ arch/sparc/kernel/head_32.S | 1 +
+ arch/sparc/kernel/rtrap_32.S | 5 +++
+ 6 files changed, 115 insertions(+), 2 deletions(-)
+
+diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
+index 73de59d47d47..c15672afbd50 100644
+--- a/arch/sparc/Kconfig
++++ b/arch/sparc/Kconfig
+@@ -397,11 +397,18 @@ config SPARC_LEON
+ if SPARC_LEON
+ config SPARC_LEON_FIX_UT700
+ bool "UT700 errata fixes"
++ select SPARC_LEON_FIX_TN0018
+ default n
+ help
+ If you say Y here, errata fixes will be included for UT700 errata.
+ If unsure, say N.
+
++config SPARC_LEON_FIX_TN0018
++ bool "Errata fix for the TN-0018 errata"
++ default n
++ help
++ If you say Y here, errata fixes will be included for the TN-0018 errata.
++
+ config SPARC_LEON_CAS
+ bool "Use Compare and Swap"
+ default y
+diff --git a/arch/sparc/include/asm/head_32.h b/arch/sparc/include/asm/head_32.h
+index d2809c859d0c..f6832d36d7ae 100644
+--- a/arch/sparc/include/asm/head_32.h
++++ b/arch/sparc/include/asm/head_32.h
+@@ -21,8 +21,13 @@
+ rd %psr, %l0; mov num, %l7; b bad_trap_handler; rd %wim, %l3;
+
+ /* This is for traps when we want just skip the instruction which caused it */
++#ifdef CONFIG_SPARC_LEON_FIX_TN0018
++#define SKIP_TRAP(type, name) \
++ TRAP_ENTRY(type, skip_trap_tn0018)
++#else
+ #define SKIP_TRAP(type, name) \
+ jmpl %l2, %g0; rett %l2 + 4; nop; nop;
++#endif
+
+ /* Notice that for the system calls we pull a trick. We load up a
+ * different pointer to the system call vector table in %l7, but call
+@@ -63,8 +68,13 @@
+ b setcc_trap_handler; rd %psr, %l0; nop; nop;
+
+ /* The Get PSR software trap for userland. */
++#ifdef CONFIG_SPARC_LEON_FIX_TN0018
++#define GETPSR_TRAP \
++ b getpsr_trap_handler_tn0018; rd %psr, %i0; nop; nop;
++#else
+ #define GETPSR_TRAP \
+ rd %psr, %i0; jmp %l2; rett %l2 + 4; nop;
++#endif
+
+ /* This is for hard interrupts from level 1-14, 15 is non-maskable (nmi) and
+ * gets handled with another macro.
+diff --git a/arch/sparc/include/asm/leon.h b/arch/sparc/include/asm/leon.h
+index 143c06f8c7bc..9d5485c6dd1f 100644
+--- a/arch/sparc/include/asm/leon.h
++++ b/arch/sparc/include/asm/leon.h
+@@ -58,6 +58,8 @@
+ #define ASI_LEON3_SYSCTRL_DCFG 0x0c
+ #define ASI_LEON3_SYSCTRL_CFG_SNOOPING (1 << 27)
+ #define ASI_LEON3_SYSCTRL_CFG_SSIZE(c) (1 << ((c >> 20) & 0xf))
++#define ASI_LEON3_SYSCTRL_CTRL_IP_BIT 15
++#define ASI_LEON3_SYSCTRL_CTRL_ICS (0x3 << 0)
+
+ #ifndef __ASSEMBLY__
+
+@@ -272,4 +274,61 @@ extern int leon_ipi_irq;
+ * 64Kbytes by the Host controller.
+ */
+
++/*
++ * TN-0018 errata fix macros. Needs to be here due to otherwise circular
++ * dependencies between leon.h and asmmacro.h (that otherwise contains errata
++ * fix macros for assembly).
++ */
++#ifdef CONFIG_SPARC_LEON_FIX_TN0018
++/*
++ * l3: (out) original cctrl
++ * l4: (out) original cctrl with ics=0
++ * NOTE: This macro modifies psr.icc.
++ */
++#define TN0018_WAIT_IFLUSH \
++ 1: \
++ lda [%g0] ASI_LEON3_SYSCTRL, %l3; \
++ srl %l3, ASI_LEON3_SYSCTRL_CTRL_IP_BIT, %l4; \
++ andcc %l4, 1, %g0; \
++ bne 1b; \
++ andn %l3, ASI_LEON3_SYSCTRL_CTRL_ICS, %l4;
++
++
++#define TN0018_WRITE_PSR(psrsrc) \
++ wr %psrsrc, %psr; \
++ WRITE_PAUSE
++
++/*
++ * l3: (in) original cctrl
++ * l4: (in) original cctrl with ics=0
++ * NOTE: This macro MUST be immediately followed by the "jmp;rett" pair.
++ */
++#define TN0018_FIX \
++ /* align the sta for performance */ \
++ .align 0x20; \
++ /* disable icache */ \
++ sta %l4, [%g0] ASI_LEON3_SYSCTRL; \
++ /* delay for sta to have effect on rett */ \
++ nop; \
++ /* delay + catch rf parity error on l1 */ \
++ or %l1, %l1, %l1; \
++ /* delay + catch rf parity error on l2 */ \
++ or %l2, %l2, %l2; \
++ /* re-enable icache after rett */ \
++ sta %l3, [%g0] ASI_LEON3_SYSCTRL; \
++ /* delay ensures insn after gets cached */ \
++ nop;
++
++#define TN0018_FULL_FIX(psrsrc) \
++ TN0018_WAIT_IFLUSH \
++ TN0018_WRITE_PSR(psrsrc) \
++ TN0018_FIX
++
++#else
++#define TN0018_WAIT_IFLUSH
++#define TN0018_WRITE_PSR(psrsrc)
++#define TN0018_FIX
++#define TN0018_FULL_FIX(psrsrc)
++#endif /* CONFIG_SPARC_LEON_FIX_TN0018 */
++
+ #endif
+diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
+index c0fdf1de10f2..66a11fc3dc30 100644
+--- a/arch/sparc/kernel/entry.S
++++ b/arch/sparc/kernel/entry.S
+@@ -31,6 +31,7 @@
+
+ #include <asm/asmmacro.h>
+ #include <asm/export.h>
++#include <asm/leon.h>
+
+ #define curptr g6
+
+@@ -159,10 +160,12 @@ floppy_fifo_emptied:
+ B2B_SINGLE_NOP
+ st %l6, [%l7 + %lo(pdma_size)]
+
++ TN0018_WAIT_IFLUSH
+ /* Restore condition codes */
+ wr %l0, 0x0, %psr
+ WRITE_PAUSE
+
++ TN0018_FIX
+ jmp %l1
+ rett %l2
+
+@@ -460,6 +463,7 @@ bad_instruction:
+ RESTORE_ALL
+
+ 1: /* unimplemented flush - just skip */
++ TN0018_FULL_FIX(l0)
+ jmpl %l2, %g0
+ rett %l2 + 4
+
+@@ -542,6 +546,7 @@ fpe_trap_handler:
+ or %l5, %lo(fpsave_catch2), %l5
+ wr %l0, 0x0, %psr
+ WRITE_PAUSE
++ /* TN0018 note: We know we return to stfsr */
+ jmp %l5
+ rett %l5 + 4
+ 1:
+@@ -549,6 +554,7 @@ fpe_trap_handler:
+ or %l5, %lo(fpsave_catch), %l5
+ wr %l0, 0x0, %psr
+ WRITE_PAUSE
++ /* TN0018 note: We know we return to stfsr */
+ jmp %l5
+ rett %l5 + 4
+
+@@ -715,6 +721,8 @@ flush_patch_one:
+ getcc_trap_handler:
+ srl %l0, 20, %g1 ! give user
+ and %g1, 0xf, %g1 ! only ICC bits in %psr
++
++ TN0018_FULL_FIX(l0)
+ jmp %l2 ! advance over trap instruction
+ rett %l2 + 0x4 ! like this...
+
+@@ -730,14 +738,37 @@ setcc_trap_handler:
+ set PSR_ICC, %l5
+ andn %l0, %l5, %l0 ! clear ICC bits in %psr
+ and %l4, %l5, %l4 ! clear non-ICC bits in user value
+- or %l4, %l0, %l4 ! or them in... mix mix mix
++ or %l4, %l0, %l5 ! or them in... mix mix mix
++
++ TN0018_WAIT_IFLUSH
+
+- wr %l4, 0x0, %psr ! set new %psr
++ wr %l5, 0x0, %psr ! set new %psr
+ WRITE_PAUSE ! TI scumbags...
+
++ TN0018_FIX
+ jmp %l2 ! advance over trap instruction
+ rett %l2 + 0x4 ! like this...
+
++#ifdef CONFIG_SPARC_LEON_FIX_TN0018
++ /* The getpsr software trap TN0018 version.
++ * The user wants the psr in %o0, established in %i0 in trap table.
++ */
++
++ .align 4
++ .globl getpsr_trap_handler_tn0018
++getpsr_trap_handler_tn0018:
++ TN0018_FULL_FIX(i0)
++ jmp %l2
++ rett %l2 + 0x4
++
++ .align 4
++ .globl skip_trap_tn0018
++skip_trap_tn0018:
++ TN0018_FULL_FIX(l0)
++ jmp %l2
++ rett %l2 + 0x4
++#endif
++
+ sun4m_nmi_error:
+ /* NMI async memory error handling. */
+ sethi %hi(0x80000000), %l4
+diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S
+index cdff4d974434..f769e2617f7e 100644
+--- a/arch/sparc/kernel/head_32.S
++++ b/arch/sparc/kernel/head_32.S
+@@ -27,6 +27,7 @@
+ #include <asm/pgtable.h> /* PGDIR_SHIFT */
+ #include <asm/export.h>
+ #include <asm/asmmacro.h>
++#include <asm/leon.h>
+
+ .data
+ /* The following are used with the prom_vector node-ops to figure out
+diff --git a/arch/sparc/kernel/rtrap_32.S b/arch/sparc/kernel/rtrap_32.S
+index 8931fe266346..9b6978e5fd67 100644
+--- a/arch/sparc/kernel/rtrap_32.S
++++ b/arch/sparc/kernel/rtrap_32.S
+@@ -14,6 +14,7 @@
+ #include <asm/winmacro.h>
+ #include <asm/asmmacro.h>
+ #include <asm/thread_info.h>
++#include <asm/leon.h>
+
+ #define t_psr l0
+ #define t_pc l1
+@@ -151,9 +152,11 @@ ret_trap_userwins_ok:
+ LOAD_PT_YREG(sp, g1)
+ LOAD_PT_GLOBALS(sp)
+
++ TN0018_WAIT_IFLUSH
+ wr %t_psr, 0x0, %psr
+ WRITE_PAUSE
+
++ TN0018_FIX
+ jmp %t_pc
+ rett %t_npc
+
+@@ -207,9 +210,11 @@ rtrap_patch5: and %g1, 0xff, %g1
+ 2:
+ sethi %hi(PSR_SYSCALL), %twin_tmp1
+ andn %t_psr, %twin_tmp1, %t_psr
++ TN0018_WAIT_IFLUSH
+ wr %t_psr, 0x0, %psr
+ WRITE_PAUSE
+
++ TN0018_FIX
+ jmp %t_pc
+ rett %t_npc
+
+--
+2.34.1
+
--- /dev/null
+From 6104175c0a7784c5cf333e2d9566db32e0fe676a Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Wed, 13 Oct 2021 10:50:14 +0200
+Subject: [PATCH 25/32] sparc32,leon: Support futex atomic operations on SMP
+ using CAS
+
+This this needed for priority inheritance support. Previously futex
+atomic operations only worked for !SMP. With CAS enabled, now CAS is
+used regardless of being SMP or not.
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ arch/sparc/Kconfig | 1 +
+ arch/sparc/include/asm/futex_32.h | 107 +++++++++++++++++++++++++++++-
+ 2 files changed, 107 insertions(+), 1 deletion(-)
+
+diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
+index c15672afbd50..a9d618c6f6c1 100644
+--- a/arch/sparc/Kconfig
++++ b/arch/sparc/Kconfig
+@@ -411,6 +411,7 @@ config SPARC_LEON_FIX_TN0018
+
+ config SPARC_LEON_CAS
+ bool "Use Compare and Swap"
++ select HAVE_FUTEX_CMPXCHG if FUTEX
+ default y
+ help
+ If you say Y here the kernel will use the CASA instruction. Enable
+diff --git a/arch/sparc/include/asm/futex_32.h b/arch/sparc/include/asm/futex_32.h
+index 6a332a9f099c..992a1f0e8f1c 100644
+--- a/arch/sparc/include/asm/futex_32.h
++++ b/arch/sparc/include/asm/futex_32.h
+@@ -1,6 +1,111 @@
+ #ifndef _ASM_FUTEX_H
+ #define _ASM_FUTEX_H
+
+-#include <asm-generic/futex.h>
++#ifdef CONFIG_SPARC_LEON_CAS
++
++#include <linux/futex.h>
++#include <linux/uaccess.h>
++#include <asm/errno.h>
++
++#define __futex_cas_op(insn, ret, oldval, uaddr, oparg) \
++ __asm__ __volatile__( \
++ "\n1: lda [%3] 0xa, %2\n" \
++ " " insn "\n" \
++ "2: casa [%3] 0xa, %2, %1\n" \
++ " cmp %2, %1\n" \
++ " bne 1b\n" \
++ " mov 0, %0\n" \
++ "3:\n" \
++ " .section .fixup,#alloc,#execinstr\n" \
++ " .align 4\n" \
++ "4: sethi %%hi(3b), %0\n" \
++ " jmpl %0 + %%lo(3b), %%g0\n" \
++ " mov %5, %0\n" \
++ " .previous\n" \
++ " .section __ex_table,\"a\"\n" \
++ " .align 4\n" \
++ " .word 1b, 4b\n" \
++ " .word 2b, 4b\n" \
++ " .previous\n" \
++ : "=&r" (ret), "=&r" (oldval), "=&r" (tem) \
++ : "r" (uaddr), "r" (oparg), "i" (-EFAULT) \
++ : "memory")
++
++static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
++ u32 __user *uaddr)
++{
++ int oldval = 0, ret, tem;
++
++ if (unlikely((((unsigned long) uaddr) & 0x3UL)))
++ return -EINVAL;
++
++ /*
++ * Not all LEONs gets traps as if in user mode when using ASI 0xa in
++ * supervisor mode, so check explicitly.
++ */
++ if (unlikely(!access_ok(uaddr, 4)))
++ return -EFAULT;
++
++ switch (op) {
++ case FUTEX_OP_SET:
++ __futex_cas_op("mov\t%4, %1", ret, oldval, uaddr, oparg);
++ break;
++ case FUTEX_OP_ADD:
++ __futex_cas_op("add\t%2, %4, %1", ret, oldval, uaddr, oparg);
++ break;
++ case FUTEX_OP_OR:
++ __futex_cas_op("or\t%2, %4, %1", ret, oldval, uaddr, oparg);
++ break;
++ case FUTEX_OP_ANDN:
++ __futex_cas_op("andn\t%2, %4, %1", ret, oldval, uaddr, oparg);
++ break;
++ case FUTEX_OP_XOR:
++ __futex_cas_op("xor\t%2, %4, %1", ret, oldval, uaddr, oparg);
++ break;
++ default:
++ ret = -ENOSYS;
++ }
+
++ if (!ret)
++ *oval = oldval;
++
++ return ret;
++}
++
++static inline int
++futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
++ u32 oldval, u32 newval)
++{
++ int ret = 0;
++
++ /*
++ * Not all LEONs gets traps as if in user mode when using ASI 0xa in
++ * supervisor mode, so check explicitly.
++ */
++ if (unlikely(!access_ok(uaddr, 4)))
++ return -EFAULT;
++
++ __asm__ __volatile__(
++ "\n1: casa [%4] 0xb, %3, %1\n"
++ "2:\n"
++ " .section .fixup,#alloc,#execinstr\n"
++ " .align 4\n"
++ "3: sethi %%hi(2b), %0\n"
++ " jmpl %0 + %%lo(2b), %%g0\n"
++ " mov %5, %0\n"
++ " .previous\n"
++ " .section __ex_table,\"a\"\n"
++ " .align 4\n"
++ " .word 1b, 3b\n"
++ " .previous\n"
++ : "+r" (ret), "=r" (newval)
++ : "1" (newval), "r" (oldval), "r" (uaddr), "i" (-EFAULT)
++ : "memory");
++
++ *uval = newval;
++ return ret;
++}
++#else /* CONFIG_SPARC_LEON_CAS */
++#include <asm-generic/futex.h>
++#endif
+ #endif
+--
+2.34.1
+
--- /dev/null
+From d4a7cb9b73160cc6d7990d14bdce5e6a5cd141f1 Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Wed, 22 Dec 2021 14:39:54 +0100
+Subject: [PATCH 26/32] phy/micrel: Add phy_id matching for KSZ9021GN
+
+KSZ9021GN has phy_id 0x00221612, which has not been matched to KSZ9021
+since commit 48d7d0ad9022 ("phy/micrel: change phy_id_mask for KSZ9021
+and KS8001") made the mask too strict for that.
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ drivers/net/phy/micrel.c | 4 ++--
+ include/linux/micrel_phy.h | 1 +
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
+index bbbe198f83e8..f879d64623a6 100644
+--- a/drivers/net/phy/micrel.c
++++ b/drivers/net/phy/micrel.c
+@@ -1292,7 +1292,7 @@ static struct phy_driver ksphy_driver[] = {
+ .resume = genphy_resume,
+ }, {
+ .phy_id = PHY_ID_KSZ9021,
+- .phy_id_mask = 0x000ffffe,
++ .phy_id_mask = 0x000ffffc,
+ .name = "Micrel KSZ9021 Gigabit PHY",
+ /* PHY_GBIT_FEATURES */
+ .driver_data = &ksz9021_type,
+@@ -1396,7 +1396,7 @@ MODULE_AUTHOR("David J. Choi");
+ MODULE_LICENSE("GPL");
+
+ static struct mdio_device_id __maybe_unused micrel_tbl[] = {
+- { PHY_ID_KSZ9021, 0x000ffffe },
++ { PHY_ID_KSZ9021, 0x000ffffc },
+ { PHY_ID_KSZ9031, MICREL_PHY_ID_MASK },
+ { PHY_ID_KSZ9131, MICREL_PHY_ID_MASK },
+ { PHY_ID_KSZ8001, 0x00fffffc },
+diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
+index 416ee6dd2574..f2cc258bed1a 100644
+--- a/include/linux/micrel_phy.h
++++ b/include/linux/micrel_phy.h
+@@ -13,6 +13,7 @@
+ #define PHY_ID_KSZ8873MLL 0x000e7237
+ #define PHY_ID_KSZ9021 0x00221610
+ #define PHY_ID_KSZ9021RLRN 0x00221611
++#define PHY_ID_KSC9021GN 0x00221612
+ #define PHY_ID_KS8737 0x00221720
+ #define PHY_ID_KSZ8021 0x00221555
+ #define PHY_ID_KSZ8031 0x00221556
+--
+2.34.1
+
--- /dev/null
+From af2f602883c3f7863466e83c96580dc02d128414 Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Mon, 30 May 2022 16:31:35 +0200
+Subject: [PATCH 27/32] sparc32,leon: Add leon_defconfig
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ arch/sparc/configs/leon_defconfig | 118 ++++++++++++++++++++++++++++++
+ 1 file changed, 118 insertions(+)
+ create mode 100644 arch/sparc/configs/leon_defconfig
+
+diff --git a/arch/sparc/configs/leon_defconfig b/arch/sparc/configs/leon_defconfig
+new file mode 100644
+index 000000000000..08ba7c8327d7
+--- /dev/null
++++ b/arch/sparc/configs/leon_defconfig
+@@ -0,0 +1,118 @@
++CONFIG_DEFAULT_HOSTNAME="leon"
++CONFIG_SYSVIPC=y
++CONFIG_LOG_BUF_SHIFT=14
++CONFIG_BLK_DEV_INITRD=y
++# CONFIG_RD_BZIP2 is not set
++# CONFIG_RD_LZMA is not set
++# CONFIG_RD_XZ is not set
++# CONFIG_RD_LZO is not set
++# CONFIG_RD_LZ4 is not set
++# CONFIG_RD_ZSTD is not set
++# CONFIG_FHANDLE is not set
++CONFIG_EMBEDDED=y
++CONFIG_SLAB=y
++CONFIG_SMP=y
++CONFIG_HZ_100=y
++CONFIG_SPARC_LEON=y
++CONFIG_SUN_OPENPROMFS=y
++CONFIG_SUN_OPENPROMIO=y
++# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
++CONFIG_BINFMT_MISC=y
++# CONFIG_COMPACTION is not set
++CONFIG_NET=y
++CONFIG_PACKET=y
++CONFIG_UNIX=y
++CONFIG_INET=y
++CONFIG_IP_PNP=y
++# CONFIG_WIRELESS is not set
++CONFIG_UEVENT_HELPER=y
++CONFIG_DEVTMPFS=y
++CONFIG_DEVTMPFS_MOUNT=y
++CONFIG_NETDEVICES=y
++CONFIG_GRETH=y
++# CONFIG_NET_VENDOR_ALACRITECH is not set
++# CONFIG_NET_VENDOR_AMAZON is not set
++# CONFIG_NET_VENDOR_AMD is not set
++# CONFIG_NET_VENDOR_AQUANTIA is not set
++# CONFIG_NET_VENDOR_ARC is not set
++# CONFIG_NET_VENDOR_AURORA is not set
++# CONFIG_NET_VENDOR_BROADCOM is not set
++# CONFIG_NET_VENDOR_CADENCE is not set
++# CONFIG_NET_VENDOR_CAVIUM is not set
++# CONFIG_NET_VENDOR_CORTINA is not set
++# CONFIG_NET_VENDOR_EZCHIP is not set
++# CONFIG_NET_VENDOR_GOOGLE is not set
++# CONFIG_NET_VENDOR_HUAWEI is not set
++# CONFIG_NET_VENDOR_INTEL is not set
++# CONFIG_NET_VENDOR_MARVELL is not set
++# CONFIG_NET_VENDOR_MICREL is not set
++# CONFIG_NET_VENDOR_MICROCHIP is not set
++# CONFIG_NET_VENDOR_MICROSEMI is not set
++# CONFIG_NET_VENDOR_NATSEMI is not set
++# CONFIG_NET_VENDOR_NETRONOME is not set
++# CONFIG_NET_VENDOR_NI is not set
++# CONFIG_NET_VENDOR_PENSANDO is not set
++# CONFIG_NET_VENDOR_QUALCOMM is not set
++# CONFIG_NET_VENDOR_RENESAS is not set
++# CONFIG_NET_VENDOR_ROCKER is not set
++# CONFIG_NET_VENDOR_SAMSUNG is not set
++# CONFIG_NET_VENDOR_SEEQ is not set
++# CONFIG_NET_VENDOR_SOLARFLARE is not set
++# CONFIG_NET_VENDOR_SOCIONEXT is not set
++# CONFIG_NET_VENDOR_STMICRO is not set
++# CONFIG_NET_VENDOR_SUN is not set
++# CONFIG_NET_VENDOR_SYNOPSYS is not set
++# CONFIG_NET_VENDOR_VIA is not set
++# CONFIG_NET_VENDOR_WIZNET is not set
++# CONFIG_NET_VENDOR_XILINX is not set
++CONFIG_BROADCOM_PHY=y
++CONFIG_CICADA_PHY=y
++CONFIG_DAVICOM_PHY=y
++CONFIG_ICPLUS_PHY=y
++CONFIG_LXT_PHY=y
++CONFIG_LSI_ET1011C_PHY=y
++CONFIG_MARVELL_PHY=y
++CONFIG_MICREL_PHY=y
++CONFIG_NATIONAL_PHY=y
++CONFIG_QSEMI_PHY=y
++CONFIG_REALTEK_PHY=y
++CONFIG_SMSC_PHY=y
++CONFIG_STE10XP=y
++CONFIG_VITESSE_PHY=y
++# CONFIG_WLAN is not set
++# CONFIG_INPUT_KEYBOARD is not set
++# CONFIG_INPUT_MOUSE is not set
++# CONFIG_SERIO_I8042 is not set
++CONFIG_SERIAL_SUNZILOG=y
++CONFIG_SERIAL_SUNZILOG_CONSOLE=y
++CONFIG_SERIAL_GRLIB_GAISLER_APBUART=y
++CONFIG_SERIAL_GRLIB_GAISLER_APBUART_CONSOLE=y
++# CONFIG_HW_RANDOM is not set
++CONFIG_DEVKMEM=y
++# CONFIG_HWMON is not set
++# CONFIG_HID_GENERIC is not set
++# CONFIG_USB_SUPPORT is not set
++# CONFIG_RTC_HCTOSYS is not set
++# CONFIG_RTC_INTF_SYSFS is not set
++# CONFIG_RTC_INTF_PROC is not set
++# CONFIG_RTC_INTF_DEV is not set
++# CONFIG_VIRTIO_MENU is not set
++# CONFIG_VHOST_MENU is not set
++# CONFIG_IOMMU_SUPPORT is not set
++CONFIG_AUTOFS4_FS=y
++CONFIG_FSCACHE=y
++CONFIG_PROC_KCORE=y
++CONFIG_TMPFS=y
++# CONFIG_MISC_FILESYSTEMS is not set
++CONFIG_NFS_FS=y
++CONFIG_NFS_V4=y
++CONFIG_ROOT_NFS=y
++CONFIG_NFS_FSCACHE=y
++CONFIG_NLS=y
++CONFIG_DEBUG_INFO=y
++CONFIG_DEBUG_INFO_DWARF4=y
++CONFIG_MAGIC_SYSRQ=y
++CONFIG_DEBUG_FS=y
++CONFIG_DETECT_HUNG_TASK=y
++# CONFIG_SCHED_DEBUG is not set
++# CONFIG_RUNTIME_TESTING_MENU is not set
+--
+2.34.1
+
--- /dev/null
+From b04fd686298fdb90d617a9bb07d3c8dd3d0962f8 Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Thu, 5 Jan 2023 16:57:29 +0100
+Subject: [PATCH 28/32] sparc32,leon: Make what -mcpu to be used configurable,
+ defaulting to none
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ arch/sparc/Kconfig | 32 ++++++++++++++++++++++++++++++++
+ arch/sparc/Makefile | 13 +++++++++----
+ 2 files changed, 41 insertions(+), 4 deletions(-)
+
+diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
+index a9d618c6f6c1..8188cae9786f 100644
+--- a/arch/sparc/Kconfig
++++ b/arch/sparc/Kconfig
+@@ -395,6 +395,38 @@ config SPARC_LEON
+ toolchain at www.gaisler.com.
+
+ if SPARC_LEON
++
++choice
++ prompt "LEON CPU architecture"
++ default SPARC_LEON_MCPU_DEFAULT
++ help
++ This chooses if and what architecture shall be used if any
++ to build the kernel.
++
++config SPARC_LEON_MCPU_DEFAULT
++ bool "default"
++ help
++ Build the kernel with no -mcpu option, getting the default
++ for the toolchain that is being used.
++
++config SPARC_LEON_MCPU_LEON3
++ bool "leon3"
++ help
++ Build the kernel with -mcpu=leon3.
++
++config SPARC_LEON_MCPU_LEON5
++ bool "leon5"
++ help
++ Build the kernel with -mcpu=leon5.
++
++endchoice
++
++config SPARC_LEON_MCPU
++ string
++ default "" if SPARC_LEON_MCPU_DEFAULT
++ default "leon3" if SPARC_LEON_MCPU_LEON3
++ default "leon5" if SPARC_LEON_MCPU_LEON5
++
+ config SPARC_LEON_FIX_UT700
+ bool "UT700 errata fixes"
+ select SPARC_LEON_FIX_TN0018
+diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
+index aa9d12c54e4b..63894edc86c6 100644
+--- a/arch/sparc/Makefile
++++ b/arch/sparc/Makefile
+@@ -26,19 +26,24 @@ export BITS := 32
+ UTS_MACHINE := sparc
+
+ ifeq ($(CONFIG_SPARC_LEON),y)
+-SPARC_MCPU=leon3
++leon_mcpu := $(strip $(shell echo $(CONFIG_SPARC_LEON_MCPU)))
++ifeq ($(leon_mcpu),)
++SPARC_MCPU=
+ else
+-SPARC_MCPU=v8
++SPARC_MCPU=-mcpu=$(leon_mcpu)
++endif
++else
++SPARC_MCPU=-mcpu=v8
+ endif
+
+ # We are adding -Wa,-Av8 to KBUILD_CFLAGS to deal with a specs bug in some
+ # versions of gcc. Some gcc versions won't pass -Av8 to binutils when you
+ # give -mcpu=v8. This silently worked with older bintutils versions but
+ # does not any more.
+-KBUILD_CFLAGS += -m32 -mcpu=$(SPARC_MCPU) -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7
++KBUILD_CFLAGS += -m32 $(SPARC_MCPU) -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7
+ KBUILD_CFLAGS += -Wa,-Av8
+
+-KBUILD_AFLAGS += -m32 -mcpu=$(SPARC_MCPU) -Wa,-Av8
++KBUILD_AFLAGS += -m32 $(SPARC_MCPU) -Wa,-Av8
+
+ ifeq ($(CONFIG_SPARC_LEON_FIX_UT700),y)
+ KBUILD_CFLAGS += -mfix-ut700 -fno-jump-tables
+--
+2.34.1
+
--- /dev/null
+From e3848e4e1e8a57462b0b2990e1a6ef96d55306bf Mon Sep 17 00:00:00 2001
+From: Eneli Elbing <eneli.elbing@gaisler.com>
+Date: Wed, 11 Jan 2023 10:53:33 +0100
+Subject: [PATCH 29/32] drivers/perf: Add driver for L4STAT in GR740
+ configuration
+
+The LEON4 Statistics Unit (L4STAT) is used to count events in the LEON4
+processors and on the processor AHB bus. The driver supports L4STAT on
+GR740. Performance statistics can be gathered using the perf stat
+command.
+
+Signed-off-by: Eneli Elbing <eneli.elbing@gaisler.com>
+---
+ Documentation/admin-guide/perf/index.rst | 1 +
+ Documentation/admin-guide/perf/l4stat_pmu.rst | 118 +++
+ MAINTAINERS | 6 +
+ arch/sparc/Kconfig | 2 +-
+ arch/sparc/include/asm/perf_event.h | 28 +-
+ arch/sparc/include/asm/perf_event_32.h | 25 +
+ .../asm/{perf_event.h => perf_event_64.h} | 4 +-
+ drivers/perf/Kconfig | 10 +
+ drivers/perf/Makefile | 1 +
+ drivers/perf/l4stat_pmu.c | 705 ++++++++++++++++++
+ 10 files changed, 873 insertions(+), 27 deletions(-)
+ create mode 100644 Documentation/admin-guide/perf/l4stat_pmu.rst
+ create mode 100644 arch/sparc/include/asm/perf_event_32.h
+ copy arch/sparc/include/asm/{perf_event.h => perf_event_64.h} (91%)
+ create mode 100644 drivers/perf/l4stat_pmu.c
+
+diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst
+index 5a8f2529a033..e4487442b463 100644
+--- a/Documentation/admin-guide/perf/index.rst
++++ b/Documentation/admin-guide/perf/index.rst
+@@ -16,3 +16,4 @@ Performance monitor support
+ xgene-pmu
+ arm_dsu_pmu
+ thunderx2-pmu
++ l4stat_pmu
+diff --git a/Documentation/admin-guide/perf/l4stat_pmu.rst b/Documentation/admin-guide/perf/l4stat_pmu.rst
+new file mode 100644
+index 000000000000..acdf87435842
+--- /dev/null
++++ b/Documentation/admin-guide/perf/l4stat_pmu.rst
+@@ -0,0 +1,118 @@
++.. SPDX-License-Identifier: GPL-2.0+
++
++==================================
++LEON4 Statistics Unit - L4STAT PMU
++==================================
++
++The LEON4 Statistics Unit (L4STAT) is used to count events in the LEON4
++processors and on the processor AHB bus. The statistics unit has 16 hardware
++counters. In its current form, the driver supports L4STAT on GR740. The
++documentation of the L4STAT core on GR740 can be found in the GR740 Data Sheet
++and User’s Manual at https://www.gaisler.com/gr740
++
++PMU (perf) driver
++-----------------
++
++The L4STAT driver registers a single PMU device, see
++/sys/bus/event_source/devices/l4stat. Events are listed in the events/ directory
++and available configurations in the format/ directory. An event can be referred
++to either by its symbolic name or its numeric, hexadecimal value with the
++config/event parameter. The config1/ahbm parameter is used to set the CPU/AHBM
++field in the control register. The config2/su parameter is used to set the SU
++field. Both are 0 by default.
++
++Task-specific vs system-wide mode
++---------------------------------
++
++In task-specific mode, the counters only run on the CPUs that the task is
++currently scheduled on. In system-wide mode (-a), the counters run on all CPUs,
++regardless of which CPUs the task is scheduled on. The -A flag can be used for a
++per-CPU breakdown in system-wide mode. Per-CPU breakdown in task-specific mode
++is not supported.
++
++Processor events
++----------------
++
++The default behaviour is to count processor events on all CPUs. Non-CPU AHBMs
++can be specified with the config1/ahbm parameter. The config1/ahbm parameter is
++ignored for config1/ahbm <= 3, i.e. AHBMs that correspond to the CPUs, since
++they are measured by default.
++
++When specifying config1/ahbm > 3 and using the -a flag, it should be done in
++conjunction with specifying a single CPU with the -C option, otherwise a counter
++is started on every CPU and the result will be a multiple of the number of CPUs.
++The same result can be achieved by omitting the -a flag, i.e. using
++task-specific mode instead.
++
++AHB and device-specific (external/user-defined) events
++------------------------------------------------------
++
++Filtering AHB and device-specific events is activated with the config2/su
++parameter. It is 0 by default, meaning that filtering is off, so the resulting
++counts are for all AHB masters in total, and the config1/ahbm parameter is
++ignored. Setting the config2/su parameter to 2 will result in behaviour similar
++to processor events, where config1/ahbm <= 3 is ignored and config1/ahbm > 3 is
++filtered. The config1/ahbm parameter is ignored in case of events that do not
++support CPU/AHBM filtering (0x63-0x6E). Setting config2/su to 3 will filter on
++any config1/ahbm, both CPU and non-CPU.
++
++Events generated from REQ/GNT signals
++-------------------------------------
++These events are active when an AHB master has request (REQ) asserted, while
++another AHB master has grant (GNT) asserted/deasserted. The REQ AHBM is set
++with the config1/ahbm parameter, while the GNT AHBM is set by the event ID.
++
++The MSB of the event ID specifies whether GNT is asserted (8) or deasserted (9).
++The LSB of the event ID specifies the GNT AHBM according to the following
++mapping:
++8:6 - Masters 2, 1, 0 on memory AHB bus
++5:0 - Masters on Processor AHB bus
++
++Limitations
++-----------
++
++* Sampling is not supported.
++This means that "perf record" and related commands will not work. Events can be
++counted with "perf stat" (see example usage below).
++* Scaling is not supported.
++This means that the number of events for a single run is limited by the number
++of hardware counters, i.e. 16. When measuring events on CPU AHBMs, each CPU
++takes up one counter each, limiting the number of CPU events that can be counted
++in parallel to four.
++
++Example usage
++-------------
++
++ List all available events
++ $# perf list
++
++ Some command formatting examples for counting the total number of instructions
++ $# perf stat -e proc_total_instructions sleep 1
++ $# perf stat -e l4stat/proc_total_instructions/ sleep 1
++ $# perf stat -e l4stat/event=0x11/ sleep 1
++ $# perf stat -e l4stat/config=0x11/ sleep 1
++
++ Some command formatting examples for specifying user and/or kernel space
++ (u/k/uk)
++ $# perf stat -e proc_total_instructions:u sleep 1
++ $# perf stat -e l4stat/proc_total_instructions/u sleep 1
++ $# perf stat -e l4stat/event=0x11,su=2/ sleep 1
++
++ Count L2 cache misses for CPU3 (AHB master 3)
++ $# perf stat -e l4stat/ext_l2cache_miss,ahbm=3/ sleep 1
++
++ Count AHB BUSY cycles for all AHB masters in total (SU is 0 by default)
++ $# perf stat -e l4stat/ahb_busy_cycles/ sleep 1
++
++ Count AHB BUSY cycles for AHB master 4 (IO Memory Management Unit)
++ $# perf stat -e l4stat/ahb_busy_cycles,ahbm=4,su=1/ sleep 1
++
++ Count events where master 1 on the processor AHB has REQ asserted and master 2
++ on the processor AHB has GNT asserted
++ $# perf stat -e l4stat/reqgnt_ahbm1_proc,ahbm=2/ sleep 1
++ $# perf stat -e l4stat/event=0x81,ahbm=2/ sleep 1
++
++ Count events where master 1 on the processor AHB has REQ asserted and master 2
++ on the processor AHB has GNT deasserted
++ $# perf stat -e l4stat/req_ahbm1_proc,ahbm=2/ sleep 1
++ $# perf stat -e l4stat/event=0x91,ahbm=2/ sleep 1
+diff --git a/MAINTAINERS b/MAINTAINERS
+index 6c5efc4013ab..69a9bfa14bc2 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -9822,6 +9822,12 @@ S: Maintained
+ F: include/net/l3mdev.h
+ F: net/l3mdev
+
++L4STAT LEON4 STATISTICS UNIT DRIVER FOR GR740
++M: Eneli Elbing <eneli.elbing@gaisler.com>
++S: Maintained
++F: Documentation/admin-guide/perf/l4stat_pmu.rst
++F: drivers/perf/l4stat_pmu.c
++
+ L7 BPF FRAMEWORK
+ M: John Fastabend <john.fastabend@gmail.com>
+ M: Daniel Borkmann <daniel@iogearbox.net>
+diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
+index 8188cae9786f..b38222ba3ae7 100644
+--- a/arch/sparc/Kconfig
++++ b/arch/sparc/Kconfig
+@@ -52,6 +52,7 @@ config SPARC
+ select NEED_DMA_MAP_STATE
+ select NEED_SG_DMA_LENGTH
+ select SET_FS
++ select HAVE_PERF_EVENTS
+
+ config SPARC32
+ def_bool !64BIT
+@@ -82,7 +83,6 @@ config SPARC64
+ select RTC_DRV_BQ4802
+ select RTC_DRV_SUN4V
+ select RTC_DRV_STARFIRE
+- select HAVE_PERF_EVENTS
+ select PERF_USE_VMALLOC
+ select ARCH_HAVE_NMI_SAFE_CMPXCHG
+ select HAVE_C_RECORDMCOUNT
+diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h
+index c2aec0c7f4f5..083d34c1189f 100644
+--- a/arch/sparc/include/asm/perf_event.h
++++ b/arch/sparc/include/asm/perf_event.h
+@@ -2,29 +2,9 @@
+ #ifndef __ASM_SPARC_PERF_EVENT_H
+ #define __ASM_SPARC_PERF_EVENT_H
+
+-#ifdef CONFIG_PERF_EVENTS
+-#include <asm/ptrace.h>
+-
+-#define perf_arch_fetch_caller_regs(regs, ip) \
+-do { \
+- unsigned long _pstate, _asi, _pil, _i7, _fp; \
+- __asm__ __volatile__("rdpr %%pstate, %0\n\t" \
+- "rd %%asi, %1\n\t" \
+- "rdpr %%pil, %2\n\t" \
+- "mov %%i7, %3\n\t" \
+- "mov %%i6, %4\n\t" \
+- : "=r" (_pstate), \
+- "=r" (_asi), \
+- "=r" (_pil), \
+- "=r" (_i7), \
+- "=r" (_fp)); \
+- (regs)->tstate = (_pstate << 8) | \
+- (_asi << 24) | (_pil << 20); \
+- (regs)->tpc = (ip); \
+- (regs)->tnpc = (regs)->tpc + 4; \
+- (regs)->u_regs[UREG_I6] = _fp; \
+- (regs)->u_regs[UREG_I7] = _i7; \
+-} while (0)
++#if defined(__sparc__) && defined(__arch64__)
++#include <asm/perf_event_64.h>
++#else
++#include <asm/perf_event_32.h>
+ #endif
+-
+ #endif
+diff --git a/arch/sparc/include/asm/perf_event_32.h b/arch/sparc/include/asm/perf_event_32.h
+new file mode 100644
+index 000000000000..b300e05ba6eb
+--- /dev/null
++++ b/arch/sparc/include/asm/perf_event_32.h
+@@ -0,0 +1,25 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef SPARC_PERF_EVENT_H
++#define SPARC_PERF_EVENT_H
++
++#ifdef CONFIG_PERF_EVENTS
++#include <asm/ptrace.h>
++
++#define perf_arch_fetch_caller_regs(regs, ip) \
++do { \
++ unsigned long _psr, _i7, _fp; \
++ __asm__ __volatile__("rd %%psr, %0\n\t" \
++ "mov %%i7, %1\n\t" \
++ "mov %%i6, %2\n\t" \
++ : "=r" (_psr), \
++ "=r" (_i7), \
++ "=r" (_fp)); \
++ (regs)->psr = _psr; \
++ (regs)->pc = (ip); \
++ (regs)->npc = (regs)->pc + 4; \
++ (regs)->u_regs[UREG_I6] = _fp; \
++ (regs)->u_regs[UREG_I7] = _i7; \
++} while (0)
++#endif
++
++#endif
+diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event_64.h
+similarity index 91%
+copy from arch/sparc/include/asm/perf_event.h
+copy to arch/sparc/include/asm/perf_event_64.h
+index c2aec0c7f4f5..a6bbfe73d23c 100644
+--- a/arch/sparc/include/asm/perf_event.h
++++ b/arch/sparc/include/asm/perf_event_64.h
+@@ -1,6 +1,6 @@
+ /* SPDX-License-Identifier: GPL-2.0 */
+-#ifndef __ASM_SPARC_PERF_EVENT_H
+-#define __ASM_SPARC_PERF_EVENT_H
++#ifndef SPARC64_PERF_EVENT_H
++#define SPARC64_PERF_EVENT_H
+
+ #ifdef CONFIG_PERF_EVENTS
+ #include <asm/ptrace.h>
+diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
+index 130327ff0b0e..a53ca336c434 100644
+--- a/drivers/perf/Kconfig
++++ b/drivers/perf/Kconfig
+@@ -130,6 +130,16 @@ config ARM_SPE_PMU
+ Extension, which provides periodic sampling of operations in
+ the CPU pipeline and reports this via the perf AUX interface.
+
++config L4STAT_PMU
++ bool "Gaisler L4STAT statistics unit support for GR740"
++ depends on SPARC
++ help
++ Enable perf driver for Gaisler LEON4 Statistics Unit (L4STAT).
++
++ Adds support for counting events in the LEON4 processor
++ and on the AHB bus, in order to create performance statistics
++ for the GR740 SoC.
++
+ source "drivers/perf/hisilicon/Kconfig"
+
+ endmenu
+diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
+index 5365fd56f88f..b64542c520ef 100644
+--- a/drivers/perf/Makefile
++++ b/drivers/perf/Makefile
+@@ -13,3 +13,4 @@ obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
+ obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
+ obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
+ obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
++obj-$(CONFIG_L4STAT_PMU) += l4stat_pmu.o
+diff --git a/drivers/perf/l4stat_pmu.c b/drivers/perf/l4stat_pmu.c
+new file mode 100644
+index 000000000000..07ee732752cb
+--- /dev/null
++++ b/drivers/perf/l4stat_pmu.c
+@@ -0,0 +1,705 @@
++// SPDX-License-Identifier: GPL-2.0+
++/*
++ * PMU driver for Cobham Gaisler L4STAT on GR740.
++ *
++ * 2022 (c) Cobham Gaisler AB
++ *
++ * This driver supports L4STAT Statistical Unit cores available in the
++ * GRLIB VHDL IP core library.
++ *
++ * Full documentation of the L4STAT core on GR740 can be found in the GR740 Data
++ * Sheet and User’s Manual at https://www.gaisler.com/gr740
++ *
++ * Contributors:
++ * - Eneli Elbing <eneli.elbing@gaisler.com>
++ */
++
++#include <linux/kernel.h>
++#include <linux/perf_event.h>
++#include <linux/platform_device.h>
++
++#define DRV_NAME "l4stat"
++#define GR740_MAX_CPUID 3
++#define L4STAT_NUM_CNTRS 16
++
++#define SU_OPT1 0x2
++#define SU_OPT2 0x3
++
++#define CCTRL_SU_USER (0x2 << CCTRL_SU_BIT)
++#define CCTRL_SU_KERNEL (0x1 << CCTRL_SU_BIT)
++#define CCTRL_EN (0x1 << CCTRL_EN_BIT)
++
++#define CCTRL_SU_BIT 14
++#define CCTRL_EN_BIT 12
++#define CCTRL_EVENTID_BIT 4
++#define CCTRL_CPUAHBM_BIT 0
++
++#define L4STAT_CVAL 0x000
++#define L4STAT_CCTRL 0x080
++
++#define APB_ADDR_OFFSET(cntr_idx) (4*cntr_idx)
++
++#define PROC_EVENT_RANGE_START L4STAT_EVENT_ICACHE_MISS
++#define PROC_EVENT_RANGE_END L4STAT_EVENT_STORE_INSTRUCTIONS
++
++/*
++ * L4STAT processor events
++ */
++#define L4STAT_EVENT_ICACHE_MISS 0x00
++#define L4STAT_EVENT_IMMU_TLB_MISS 0x01
++#define L4STAT_EVENT_ICACHE_HOLD 0x02
++#define L4STAT_EVENT_IMMU_HOLD 0x03
++#define L4STAT_EVENT_DCACHE_MISS 0x08
++#define L4STAT_EVENT_DMMU_TLB_MISS 0x09
++#define L4STAT_EVENT_DCACHE_HOLD 0x0a
++#define L4STAT_EVENT_DMMU_HOLD 0x0b
++#define L4STAT_EVENT_DATA_WRITE_BUFFER_HOLD 0x10
++#define L4STAT_EVENT_TOTAL_INSTRUCTIONS 0x11
++#define L4STAT_EVENT_INT_INSTRUCTIONS 0x12
++#define L4STAT_EVENT_FPU_INSTRUCTIONS 0x13
++#define L4STAT_EVENT_BRANCH_PREDICTION_MISS 0x14
++#define L4STAT_EVENT_EXECUTION_TIME 0x15
++#define L4STAT_EVENT_AHB_UTILIZATION 0x17
++#define L4STAT_EVENT_AHB_TOTAL_UTILIZATION 0x18
++#define L4STAT_EVENT_INT_BRANCHES 0x22
++#define L4STAT_EVENT_CALL_INSTRUCTIONS 0x28
++#define L4STAT_EVENT_REGULAR_TYPE2_INSTRUCTIONS 0x30
++#define L4STAT_EVENT_LOAD_AND_STORE_INSTRUCTIONS 0x38
++#define L4STAT_EVENT_LOAD_INSTRUCTIONS 0x39
++#define L4STAT_EVENT_STORE_INSTRUCTIONS 0x3a
++
++/*
++ * L4STAT AHB events
++ * (counted via LEON4 Debug Support Unit)
++ */
++#define L4STAT_EVENT_AHB_IDLE_CYCLES 0x40
++#define L4STAT_EVENT_AHB_BUSY_CYCLES 0x41
++#define L4STAT_EVENT_AHB_NON_SEQ_TRANSFERS 0x42
++#define L4STAT_EVENT_AHB_SEQ_TRANSFERS 0x43
++#define L4STAT_EVENT_AHB_READ_ACCESSES 0x44
++#define L4STAT_EVENT_AHB_WRITE_ACCESSES 0x45
++#define L4STAT_EVENT_AHB_BYTE_ACCESSES 0x46
++#define L4STAT_EVENT_AHB_HALF_WORD_ACCESSES 0x47
++#define L4STAT_EVENT_AHB_WORD_ACCESSES 0x48
++#define L4STAT_EVENT_AHB_DOUBLE_WORD_ACCESSES 0x49
++#define L4STAT_EVENT_AHB_QUAD_WORD_ACCESSES 0x4a
++#define L4STAT_EVENT_AHB_EIGHT_WORD_ACCESSES 0x4b
++#define L4STAT_EVENT_AHB_WAITSTATES 0x4c
++#define L4STAT_EVENT_AHB_RETRY_RESPONSES 0x4d
++#define L4STAT_EVENT_AHB_SPLIT_RESPONSES 0x4e
++#define L4STAT_EVENT_AHB_SPLIT_DELAY 0x4f
++#define L4STAT_EVENT_AHB_BUS_LOCKED 0x50
++
++/*
++ * L4STAT device specific events
++ * (may be marked as user defined events in generic software drivers)
++ */
++#define L4STAT_EVENT_L2CACHE_HIT 0x60
++#define L4STAT_EVENT_L2CACHE_MISS 0x61
++#define L4STAT_EVENT_L2CACHE_BUS_ACCESS 0x62
++#define L4STAT_EVENT_L2CACHE_TAG_CORRECTABLE_ERROR 0x63
++#define L4STAT_EVENT_L2CACHE_TAG_UNCORRECTABLE_ERROR 0x64
++#define L4STAT_EVENT_L2CACHE_DATA_CORRECTABLE_ERROR 0x65
++#define L4STAT_EVENT_L2CACHE_DATA_UNCORRECTABLE_ERROR 0x66
++#define L4STAT_EVENT_IOMMU_CACHE_LOOKUP 0x67
++#define L4STAT_EVENT_IOMMU_TABLE_WALK 0x68
++#define L4STAT_EVENT_IOMMU_ACCESS_DENIED 0x69
++#define L4STAT_EVENT_IOMMU_ACCESS_OK 0x6a
++#define L4STAT_EVENT_IOMMU_ACCESS_PASSTHROUGH 0x6b
++#define L4STAT_EVENT_IOMMU_CACHE_TLB_MISS 0x6c
++#define L4STAT_EVENT_IOMMU_CACHE_TLB_HIT 0x6d
++#define L4STAT_EVENT_IOMMU_CACHE_TLB_PARITY_ERROR 0x6e
++
++/*
++ * L4STAT AHB events
++ * (only available if core is connected to a standalone AHB trace buffer):
++ */
++#define L4STAT_EVENT_AHB_TRACE_IDLE_CYCLES 0x70
++#define L4STAT_EVENT_AHB_TRACE_BUSY_CYCLES 0x71
++#define L4STAT_EVENT_AHB_TRACE_NON_SEQ_TRANSFERS 0x72
++#define L4STAT_EVENT_AHB_TRACE_SEQ_TRANSFERS 0x73
++#define L4STAT_EVENT_AHB_TRACE_READ_ACCESSES 0x74
++#define L4STAT_EVENT_AHB_TRACE_WRITE_ACCESSES 0x75
++#define L4STAT_EVENT_AHB_TRACE_BYTE_ACCESSES 0x76
++#define L4STAT_EVENT_AHB_TRACE_HALF_WORD_ACCESSES 0x77
++#define L4STAT_EVENT_AHB_TRACE_WORD_ACCESSES 0x78
++#define L4STAT_EVENT_AHB_TRACE_DOUBLE_WORD_ACCESSES 0x79
++#define L4STAT_EVENT_AHB_TRACE_QUAD_WORD_ACCESSES 0x7a
++#define L4STAT_EVENT_AHB_TRACE_EIGHT_WORD_ACCESSES 0x7b
++#define L4STAT_EVENT_AHB_TRACE_WAITSTATES 0x7c
++#define L4STAT_EVENT_AHB_TRACE_RETRY_RESPONSES 0x7d
++#define L4STAT_EVENT_AHB_TRACE_SPLIT_RESPONSES 0x7e
++#define L4STAT_EVENT_AHB_TRACE_SPLIT_DELAY 0x7f
++
++/*
++ * L4STAT events generated from REQ/GNT signals
++ */
++# define L4STAT_EVENT_REQ_GNT_AHBM0_PROC 0x80
++# define L4STAT_EVENT_REQ_GNT_AHBM1_PROC 0x81
++# define L4STAT_EVENT_REQ_GNT_AHBM2_PROC 0x82
++# define L4STAT_EVENT_REQ_GNT_AHBM3_PROC 0x83
++# define L4STAT_EVENT_REQ_GNT_AHBM4_PROC 0x84
++# define L4STAT_EVENT_REQ_GNT_AHBM5_PROC 0x85
++# define L4STAT_EVENT_REQ_GNT_AHBM0_MEM 0x86
++# define L4STAT_EVENT_REQ_GNT_AHBM1_MEM 0x87
++# define L4STAT_EVENT_REQ_GNT_AHBM2_MEM 0x88
++# define L4STAT_EVENT_REQ_AHBM0_PROC 0x90
++# define L4STAT_EVENT_REQ_AHBM1_PROC 0x91
++# define L4STAT_EVENT_REQ_AHBM2_PROC 0x92
++# define L4STAT_EVENT_REQ_AHBM3_PROC 0x93
++# define L4STAT_EVENT_REQ_AHBM4_PROC 0x94
++# define L4STAT_EVENT_REQ_AHBM5_PROC 0x95
++# define L4STAT_EVENT_REQ_AHBM0_MEM 0x96
++# define L4STAT_EVENT_REQ_AHBM1_MEM 0x97
++# define L4STAT_EVENT_REQ_AHBM2_MEM 0x98
++
++#define to_l4stat_pmu(c) (container_of(c, struct l4stat_pmu, pmu))
++
++ssize_t l4stat_format_sysfs_show(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ struct dev_ext_attribute *eattr;
++
++ eattr = container_of(attr, struct dev_ext_attribute, attr);
++
++ return sprintf(buf, "%s\n", (char *)eattr->var);
++}
++
++ssize_t l4stat_event_sysfs_show(struct device *dev,
++ struct device_attribute *attr, char *page)
++{
++ struct dev_ext_attribute *eattr;
++
++ eattr = container_of(attr, struct dev_ext_attribute, attr);
++
++ return sprintf(page, "config=0x%lx\n",
++ (unsigned long)eattr->var);
++}
++
++#define L4STAT_ATTR(_name, _func, _config) \
++ (&((struct dev_ext_attribute[]) { \
++ { __ATTR(_name, 0444, _func, NULL), (void *)_config } \
++ })[0].attr.attr)
++
++#define L4STAT_FORMAT_ATTR(_name, _config) \
++ L4STAT_ATTR(_name, l4stat_format_sysfs_show, (void *)_config)
++
++#define L4STAT_EVENT_ATTR(_name, _config) \
++ L4STAT_ATTR(_name, l4stat_event_sysfs_show, (unsigned long)_config)
++
++/*
++ * Kernel PMU event attributes
++ */
++static struct attribute *l4stat_pmu_events[] = {
++ /*
++ * Processor events
++ */
++ L4STAT_EVENT_ATTR(proc_icache_miss, L4STAT_EVENT_ICACHE_MISS),
++ L4STAT_EVENT_ATTR(proc_immu_tlb_miss, L4STAT_EVENT_IMMU_TLB_MISS),
++ L4STAT_EVENT_ATTR(proc_icache_hold, L4STAT_EVENT_ICACHE_HOLD),
++ L4STAT_EVENT_ATTR(proc_immu_hold, L4STAT_EVENT_IMMU_HOLD),
++ L4STAT_EVENT_ATTR(proc_dcache_miss, L4STAT_EVENT_DCACHE_MISS),
++ L4STAT_EVENT_ATTR(proc_dmmu_tlb_miss, L4STAT_EVENT_DMMU_TLB_MISS),
++ L4STAT_EVENT_ATTR(proc_dcache_hold, L4STAT_EVENT_DCACHE_HOLD),
++ L4STAT_EVENT_ATTR(proc_dmmu_hold, L4STAT_EVENT_DMMU_HOLD),
++ L4STAT_EVENT_ATTR(proc_data_write_buffer_hold,
++ L4STAT_EVENT_DATA_WRITE_BUFFER_HOLD),
++ L4STAT_EVENT_ATTR(proc_total_instructions,
++ L4STAT_EVENT_TOTAL_INSTRUCTIONS),
++ L4STAT_EVENT_ATTR(proc_int_instructions, L4STAT_EVENT_INT_INSTRUCTIONS),
++ L4STAT_EVENT_ATTR(proc_fpu_instructions, L4STAT_EVENT_FPU_INSTRUCTIONS),
++ L4STAT_EVENT_ATTR(proc_branch_prediction_miss,
++ L4STAT_EVENT_BRANCH_PREDICTION_MISS),
++ L4STAT_EVENT_ATTR(proc_execution_time, L4STAT_EVENT_EXECUTION_TIME),
++ L4STAT_EVENT_ATTR(proc_ahb_utilization, L4STAT_EVENT_AHB_UTILIZATION),
++ L4STAT_EVENT_ATTR(proc_ahb_total_utilization,
++ L4STAT_EVENT_AHB_TOTAL_UTILIZATION),
++ L4STAT_EVENT_ATTR(proc_int_branches, L4STAT_EVENT_INT_BRANCHES),
++ L4STAT_EVENT_ATTR(proc_call_instructions,
++ L4STAT_EVENT_CALL_INSTRUCTIONS),
++ L4STAT_EVENT_ATTR(proc_regular_type2_instructions,
++ L4STAT_EVENT_REGULAR_TYPE2_INSTRUCTIONS),
++ L4STAT_EVENT_ATTR(proc_load_and_store_instructions,
++ L4STAT_EVENT_LOAD_AND_STORE_INSTRUCTIONS),
++ L4STAT_EVENT_ATTR(proc_load_instructions,
++ L4STAT_EVENT_LOAD_INSTRUCTIONS),
++ L4STAT_EVENT_ATTR(proc_store_instructions,
++ L4STAT_EVENT_STORE_INSTRUCTIONS),
++ /*
++ * AHB Events (LEON4 Debug Support Unit)
++ */
++ L4STAT_EVENT_ATTR(ahb_busy_cycles, L4STAT_EVENT_AHB_BUSY_CYCLES),
++ L4STAT_EVENT_ATTR(ahb_non_seq_transfers,
++ L4STAT_EVENT_AHB_NON_SEQ_TRANSFERS),
++ L4STAT_EVENT_ATTR(ahb_seq_transfers, L4STAT_EVENT_AHB_SEQ_TRANSFERS),
++ L4STAT_EVENT_ATTR(ahb_idle_cycles, L4STAT_EVENT_AHB_IDLE_CYCLES),
++ L4STAT_EVENT_ATTR(ahb_read_accesses, L4STAT_EVENT_AHB_READ_ACCESSES),
++ L4STAT_EVENT_ATTR(ahb_write_accesses, L4STAT_EVENT_AHB_WRITE_ACCESSES),
++ L4STAT_EVENT_ATTR(ahb_byte_accesses, L4STAT_EVENT_AHB_BYTE_ACCESSES),
++ L4STAT_EVENT_ATTR(ahb_half_word_accesses,
++ L4STAT_EVENT_AHB_HALF_WORD_ACCESSES),
++ L4STAT_EVENT_ATTR(ahb_word_accesses, L4STAT_EVENT_AHB_WORD_ACCESSES),
++ L4STAT_EVENT_ATTR(ahb_double_word_accesses,
++ L4STAT_EVENT_AHB_DOUBLE_WORD_ACCESSES),
++ L4STAT_EVENT_ATTR(ahb_quad_word_accesses,
++ L4STAT_EVENT_AHB_QUAD_WORD_ACCESSES),
++ L4STAT_EVENT_ATTR(ahb_eight_word_accesses,
++ L4STAT_EVENT_AHB_EIGHT_WORD_ACCESSES),
++ L4STAT_EVENT_ATTR(ahb_waitstates, L4STAT_EVENT_AHB_WAITSTATES),
++ L4STAT_EVENT_ATTR(ahb_retry_responses,
++ L4STAT_EVENT_AHB_RETRY_RESPONSES),
++ L4STAT_EVENT_ATTR(ahb_split_responses,
++ L4STAT_EVENT_AHB_SPLIT_RESPONSES),
++ L4STAT_EVENT_ATTR(ahb_split_delay, L4STAT_EVENT_AHB_SPLIT_DELAY),
++ L4STAT_EVENT_ATTR(ahb_bus_locked, L4STAT_EVENT_AHB_BUS_LOCKED),
++ /*
++ * Device specific events
++ */
++ L4STAT_EVENT_ATTR(ext_l2cache_hit, L4STAT_EVENT_L2CACHE_HIT),
++ L4STAT_EVENT_ATTR(ext_l2cache_miss, L4STAT_EVENT_L2CACHE_MISS),
++ L4STAT_EVENT_ATTR(ext_l2cache_bus_access,
++ L4STAT_EVENT_L2CACHE_BUS_ACCESS),
++ L4STAT_EVENT_ATTR(ext_l2cache_tag_correctable_error,
++ L4STAT_EVENT_L2CACHE_TAG_CORRECTABLE_ERROR),
++ L4STAT_EVENT_ATTR(ext_l2cache_tag_uncorrectable_error,
++ L4STAT_EVENT_L2CACHE_TAG_UNCORRECTABLE_ERROR),
++ L4STAT_EVENT_ATTR(ext_l2cache_data_correctable_error,
++ L4STAT_EVENT_L2CACHE_DATA_CORRECTABLE_ERROR),
++ L4STAT_EVENT_ATTR(ext_l2cache_data_uncorrectable_error,
++ L4STAT_EVENT_L2CACHE_DATA_UNCORRECTABLE_ERROR),
++ L4STAT_EVENT_ATTR(ext_iommu_cache_lookup,
++ L4STAT_EVENT_IOMMU_CACHE_LOOKUP),
++ L4STAT_EVENT_ATTR(ext_iommu_table_walk, L4STAT_EVENT_IOMMU_TABLE_WALK),
++ L4STAT_EVENT_ATTR(ext_iommu_access_denied,
++ L4STAT_EVENT_IOMMU_ACCESS_DENIED),
++ L4STAT_EVENT_ATTR(ext_iommu_access_ok, L4STAT_EVENT_IOMMU_ACCESS_OK),
++ L4STAT_EVENT_ATTR(ext_iommu_access_passthrough,
++ L4STAT_EVENT_IOMMU_ACCESS_PASSTHROUGH),
++ L4STAT_EVENT_ATTR(ext_iommu_cache_tlb_miss,
++ L4STAT_EVENT_IOMMU_CACHE_TLB_MISS),
++ L4STAT_EVENT_ATTR(ext_iommu_cache_tlb_hit,
++ L4STAT_EVENT_IOMMU_CACHE_TLB_HIT),
++ L4STAT_EVENT_ATTR(ext_iommu_cache_tlb_parity_error,
++ L4STAT_EVENT_IOMMU_CACHE_TLB_PARITY_ERROR),
++ /*
++ * AHB events (standalone AHB trace buffer)
++ */
++ L4STAT_EVENT_ATTR(ahbtrace_busy_cycles,
++ L4STAT_EVENT_AHB_TRACE_BUSY_CYCLES),
++ L4STAT_EVENT_ATTR(ahbtrace_non_seq_transfers,
++ L4STAT_EVENT_AHB_TRACE_NON_SEQ_TRANSFERS),
++ L4STAT_EVENT_ATTR(ahbtrace_seq_transfers,
++ L4STAT_EVENT_AHB_TRACE_SEQ_TRANSFERS),
++ L4STAT_EVENT_ATTR(ahbtrace_idle_cycles,
++ L4STAT_EVENT_AHB_TRACE_IDLE_CYCLES),
++ L4STAT_EVENT_ATTR(ahbtrace_read_accesses,
++ L4STAT_EVENT_AHB_TRACE_READ_ACCESSES),
++ L4STAT_EVENT_ATTR(ahbtrace_write_accesses,
++ L4STAT_EVENT_AHB_TRACE_WRITE_ACCESSES),
++ L4STAT_EVENT_ATTR(ahbtrace_byte_accesses,
++ L4STAT_EVENT_AHB_TRACE_BYTE_ACCESSES),
++ L4STAT_EVENT_ATTR(ahbtrace_half_word_accesses,
++ L4STAT_EVENT_AHB_TRACE_HALF_WORD_ACCESSES),
++ L4STAT_EVENT_ATTR(ahbtrace_word_accesses,
++ L4STAT_EVENT_AHB_TRACE_WORD_ACCESSES),
++ L4STAT_EVENT_ATTR(ahbtrace_double_word_accesses,
++ L4STAT_EVENT_AHB_TRACE_DOUBLE_WORD_ACCESSES),
++ L4STAT_EVENT_ATTR(ahbtrace_quad_word_accesses,
++ L4STAT_EVENT_AHB_TRACE_QUAD_WORD_ACCESSES),
++ L4STAT_EVENT_ATTR(ahbtrace_eight_word_accesses,
++ L4STAT_EVENT_AHB_TRACE_EIGHT_WORD_ACCESSES),
++ L4STAT_EVENT_ATTR(ahbtrace_waitstates,
++ L4STAT_EVENT_AHB_TRACE_WAITSTATES),
++ L4STAT_EVENT_ATTR(ahbtrace_retry_responses,
++ L4STAT_EVENT_AHB_TRACE_RETRY_RESPONSES),
++ L4STAT_EVENT_ATTR(ahbtrace_split_responses,
++ L4STAT_EVENT_AHB_TRACE_SPLIT_RESPONSES),
++ L4STAT_EVENT_ATTR(ahbtrace_split_delay,
++ L4STAT_EVENT_AHB_TRACE_SPLIT_DELAY),
++ /*
++ * Events generated from REQ/GNT signals
++ */
++ L4STAT_EVENT_ATTR(reqgnt_ahbm0_proc, L4STAT_EVENT_REQ_GNT_AHBM0_PROC),
++ L4STAT_EVENT_ATTR(reqgnt_ahbm1_proc, L4STAT_EVENT_REQ_GNT_AHBM1_PROC),
++ L4STAT_EVENT_ATTR(reqgnt_ahbm2_proc, L4STAT_EVENT_REQ_GNT_AHBM2_PROC),
++ L4STAT_EVENT_ATTR(reqgnt_ahbm3_proc, L4STAT_EVENT_REQ_GNT_AHBM3_PROC),
++ L4STAT_EVENT_ATTR(reqgnt_ahbm4_proc, L4STAT_EVENT_REQ_GNT_AHBM4_PROC),
++ L4STAT_EVENT_ATTR(reqgnt_ahbm5_proc, L4STAT_EVENT_REQ_GNT_AHBM5_PROC),
++ L4STAT_EVENT_ATTR(reqgnt_ahbm0_mem, L4STAT_EVENT_REQ_GNT_AHBM0_MEM),
++ L4STAT_EVENT_ATTR(reqgnt_ahbm1_mem, L4STAT_EVENT_REQ_GNT_AHBM1_MEM),
++ L4STAT_EVENT_ATTR(reqgnt_ahbm2_mem, L4STAT_EVENT_REQ_GNT_AHBM2_MEM),
++ L4STAT_EVENT_ATTR(req_ahbm0_proc, L4STAT_EVENT_REQ_AHBM0_PROC),
++ L4STAT_EVENT_ATTR(req_ahbm1_proc, L4STAT_EVENT_REQ_AHBM1_PROC),
++ L4STAT_EVENT_ATTR(req_ahbm2_proc, L4STAT_EVENT_REQ_AHBM2_PROC),
++ L4STAT_EVENT_ATTR(req_ahbm3_proc, L4STAT_EVENT_REQ_AHBM3_PROC),
++ L4STAT_EVENT_ATTR(req_ahbm4_proc, L4STAT_EVENT_REQ_AHBM4_PROC),
++ L4STAT_EVENT_ATTR(req_ahbm5_proc, L4STAT_EVENT_REQ_AHBM5_PROC),
++ L4STAT_EVENT_ATTR(req_ahbm0_mem, L4STAT_EVENT_REQ_AHBM0_MEM),
++ L4STAT_EVENT_ATTR(req_ahbm1_mem, L4STAT_EVENT_REQ_AHBM1_MEM),
++ L4STAT_EVENT_ATTR(req_ahbm2_mem, L4STAT_EVENT_REQ_AHBM2_MEM),
++ NULL,
++};
++
++static struct attribute *l4stat_pmu_format[] = {
++ L4STAT_FORMAT_ATTR(event, "config:0-7"), // EID is 8 bits
++ L4STAT_FORMAT_ATTR(ahbm, "config1:0-3"), // AHBM is 4 bits
++ L4STAT_FORMAT_ATTR(su, "config2:0-1"), // SU is 2 bits
++ NULL,
++};
++
++static const struct attribute_group l4stat_pmu_format_group = {
++ .name = "format",
++ .attrs = l4stat_pmu_format,
++};
++
++static const struct attribute_group l4stat_pmu_events_group = {
++ .name = "events",
++ .attrs = l4stat_pmu_events,
++};
++
++static const struct attribute_group *l4stat_pmu_attr_groups[] = {
++ &l4stat_pmu_format_group,
++ &l4stat_pmu_events_group,
++ NULL,
++};
++
++struct l4stat_pmu_hw_events {
++ struct perf_event **events;
++ unsigned long *used_mask;
++};
++
++struct l4stat_pmu {
++ void __iomem *regs;
++ struct pmu pmu;
++ struct l4stat_pmu_hw_events hw_events;
++ struct platform_device *platform_dev;
++};
++
++static inline u32 l4stat_pmu_read_register(struct l4stat_pmu *l4stat_pmu,
++ int idx, unsigned int reg)
++{
++ unsigned int offset = APB_ADDR_OFFSET(idx) + reg;
++
++ return ioread32be(l4stat_pmu->regs + offset);
++}
++
++static inline void l4stat_pmu_write_register(struct l4stat_pmu *l4stat_pmu,
++ u32 val, int idx, unsigned int reg)
++{
++ int offset = APB_ADDR_OFFSET(idx) + reg;
++
++ iowrite32be(val, l4stat_pmu->regs + offset);
++}
++
++static u32 l4stat_pmu_read_counter(struct perf_event *event)
++{
++ struct l4stat_pmu *l4stat_pmu = to_l4stat_pmu(event->pmu);
++ struct hw_perf_event *hw_counter = &event->hw;
++ int idx = hw_counter->idx;
++ unsigned int reg = L4STAT_CVAL;
++ u32 value;
++
++ value = l4stat_pmu_read_register(l4stat_pmu, idx, reg);
++
++ return value;
++}
++
++static void l4stat_pmu_clear_counter(struct l4stat_pmu *l4stat_pmu, int idx)
++{
++ l4stat_pmu_write_register(l4stat_pmu, 0, idx, L4STAT_CVAL);
++}
++
++static int l4stat_get_event_idx(struct l4stat_pmu_hw_events *hw)
++{
++ int idx;
++
++ /* Generic code to find an unused idx from the mask */
++ for (idx = 0; idx < L4STAT_NUM_CNTRS; ++idx)
++ if (!test_and_set_bit(idx, hw->used_mask))
++ return idx;
++
++ /* No counters available */
++ return -EAGAIN;
++}
++
++static int l4stat_map_raw_event(u64 config)
++{
++ int mapping = (int)(config);
++ return mapping;
++}
++
++int l4stat_map_event(struct perf_event *event)
++{
++ u64 config = event->attr.config;
++ int type = event->attr.type;
++
++ if (type == event->pmu->type)
++ return l4stat_map_raw_event(config);
++
++ return -ENOENT;
++}
++
++static int l4stat_pmu_event_init(struct perf_event *event)
++{
++ struct hw_perf_event *hwc = &event->hw;
++ int mapping;
++
++ mapping = l4stat_map_event(event);
++
++ if (mapping < 0) {
++ pr_debug("Invalid event %x:%llx\n", event->attr.type,
++ event->attr.config);
++ return mapping;
++ }
++
++ /*
++ * We don't assign an index until we actually place the event onto
++ * hardware. Use -1 to signify that we haven't decided where to put it
++ * yet.
++ */
++ hwc->idx = -1;
++ hwc->config_base = 0;
++ hwc->config = 0;
++ hwc->event_base = 0;
++
++ /*
++ * Store the event encoding into the config_base field.
++ */
++ hwc->config_base |= (unsigned long)mapping;
++
++ return 0;
++}
++
++static void l4stat_pmu_start(struct perf_event *event, int pmu_flags)
++{
++ struct l4stat_pmu *l4stat_pmu = to_l4stat_pmu(event->pmu);
++ struct hw_perf_event *hwc = &event->hw;
++ int idx = hwc->idx;
++ u32 ctrl;
++
++ hwc->state = 0;
++ ctrl = 0;
++
++ if (event->attr.exclude_user)
++ ctrl |= CCTRL_SU_KERNEL;
++ else if (event->attr.exclude_kernel)
++ ctrl |= CCTRL_SU_USER;
++
++ ctrl |= event->attr.config2 << CCTRL_SU_BIT;
++
++ ctrl |= event->attr.config << CCTRL_EVENTID_BIT;
++
++ // Processor events (except processor AHB events)
++ if ((event->attr.config >= PROC_EVENT_RANGE_START &&
++ event->attr.config < L4STAT_EVENT_AHB_UTILIZATION) ||
++ (event->attr.config > L4STAT_EVENT_AHB_TOTAL_UTILIZATION &&
++ event->attr.config <= PROC_EVENT_RANGE_END))
++ // CPU AHBM
++ if (event->attr.config1 <= GR740_MAX_CPUID)
++ ctrl |= event->oncpu << CCTRL_CPUAHBM_BIT;
++ // Non-CPU AHBM
++ else
++ ctrl |= event->attr.config1 << CCTRL_CPUAHBM_BIT;
++ // Remaining events (proc AHB, external, AHB, AHB trace, REQ/GNT)
++ else
++ // Filter only on non-CPU AHBM
++ if (event->attr.config2 == SU_OPT1)
++ // CPU AHBM
++ if (event->attr.config1 <= GR740_MAX_CPUID)
++ ctrl |= event->oncpu << CCTRL_CPUAHBM_BIT;
++ // Non-CPU AHBM
++ else
++ ctrl |= event->attr.config1 << CCTRL_CPUAHBM_BIT;
++ // Filter on any AHBM
++ else if (event->attr.config2 == SU_OPT2)
++ ctrl |= event->attr.config1 << CCTRL_CPUAHBM_BIT;
++
++ ctrl |= CCTRL_EN;
++
++ l4stat_pmu_write_register(l4stat_pmu, ctrl, idx, L4STAT_CCTRL);
++}
++
++static int l4stat_pmu_add(struct perf_event *event, int flags)
++{
++ struct l4stat_pmu *l4stat_pmu = to_l4stat_pmu(event->pmu);
++ struct l4stat_pmu_hw_events *hw_events = &l4stat_pmu->hw_events;
++ struct hw_perf_event *hwc = &event->hw;
++ int idx;
++
++ /* If we don't have a space for the counter then finish early. */
++ idx = l4stat_get_event_idx(hw_events);
++ if (idx < 0)
++ return idx;
++
++ event->hw.idx = idx;
++ hw_events->events[idx] = event;
++
++ hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
++ if (flags & PERF_EF_START)
++ l4stat_pmu_start(event, PERF_EF_RELOAD);
++
++ /* Propagate our changes to the userspace mapping. */
++ perf_event_update_userpage(event);
++
++ return 0;
++}
++
++static void l4stat_pmu_disable_counter(struct l4stat_pmu *l4stat_pmu, int idx)
++{
++ u32 ctrl;
++
++ ctrl = l4stat_pmu_read_register(l4stat_pmu, idx, L4STAT_CCTRL);
++ ctrl &= ~(CCTRL_EN);
++ l4stat_pmu_write_register(l4stat_pmu, ctrl, idx, L4STAT_CCTRL);
++}
++
++static u32 l4stat_pmu_event_update(struct perf_event *event)
++{
++ struct hw_perf_event *hwc = &event->hw;
++ u64 prev_raw_count, new_raw_count;
++
++ do {
++ prev_raw_count = local64_read(&hwc->prev_count);
++ new_raw_count = l4stat_pmu_read_counter(event);
++ } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
++ new_raw_count) != prev_raw_count);
++
++ local64_add(new_raw_count, &event->count);
++
++ return new_raw_count;
++}
++
++static void l4stat_pmu_stop(struct perf_event *event, int pmu_flags)
++{
++ struct l4stat_pmu *l4stat_pmu = to_l4stat_pmu(event->pmu);
++ struct hw_perf_event *hwc = &event->hw;
++ int idx = hwc->idx;
++ u64 counter_val;
++
++ if (hwc->state & PERF_HES_STOPPED)
++ return;
++
++ l4stat_pmu_disable_counter(l4stat_pmu, idx);
++
++ counter_val = l4stat_pmu_event_update(event);
++
++ l4stat_pmu_clear_counter(l4stat_pmu, idx);
++
++ hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
++}
++
++static void l4stat_pmu_del(struct perf_event *event, int flags)
++{
++ struct l4stat_pmu *l4stat_pmu = to_l4stat_pmu(event->pmu);
++ struct l4stat_pmu_hw_events *hw_events = &l4stat_pmu->hw_events;
++ struct hw_perf_event *hwc = &event->hw;
++ int idx = hwc->idx;
++
++ l4stat_pmu_stop(event, PERF_EF_UPDATE);
++ hw_events->events[idx] = NULL;
++ clear_bit(idx, hw_events->used_mask);
++
++ perf_event_update_userpage(event);
++}
++
++static int l4stat_pmu_init(struct l4stat_pmu *l4stat_pmu,
++ struct platform_device *pdev)
++{
++ platform_set_drvdata(pdev, l4stat_pmu);
++
++ l4stat_pmu->pmu = (struct pmu){
++ .module = THIS_MODULE,
++ .name = DRV_NAME,
++ .task_ctx_nr = perf_hw_context,
++ .event_init = l4stat_pmu_event_init,
++ .add = l4stat_pmu_add,
++ .del = l4stat_pmu_del,
++ .start = l4stat_pmu_start,
++ .stop = l4stat_pmu_stop,
++ .attr_groups = l4stat_pmu_attr_groups,
++ .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
++ };
++
++ l4stat_pmu->platform_dev = pdev;
++
++ return perf_pmu_register(&l4stat_pmu->pmu, DRV_NAME, -1);
++}
++
++static struct l4stat_pmu *l4stat_pmu_alloc(struct device *dev)
++{
++ struct l4stat_pmu *l4stat_pmu;
++
++ l4stat_pmu = devm_kzalloc(dev, sizeof(*l4stat_pmu), GFP_KERNEL);
++ if (!l4stat_pmu)
++ return ERR_PTR(-ENOMEM);
++
++ l4stat_pmu->hw_events.events =
++ devm_kcalloc(dev, L4STAT_NUM_CNTRS,
++ sizeof(*l4stat_pmu->hw_events.events), GFP_KERNEL);
++ if (!l4stat_pmu->hw_events.events)
++ return ERR_PTR(-ENOMEM);
++
++ l4stat_pmu->hw_events.used_mask =
++ devm_kcalloc(dev, BITS_TO_LONGS(L4STAT_NUM_CNTRS),
++ sizeof(*l4stat_pmu->hw_events.used_mask),
++ GFP_KERNEL);
++ if (!l4stat_pmu->hw_events.used_mask)
++ return ERR_PTR(-ENOMEM);
++
++ return l4stat_pmu;
++}
++
++static int l4stat_pmu_probe(struct platform_device *pdev)
++{
++ struct l4stat_pmu *l4stat_pmu;
++ int err;
++
++ l4stat_pmu = l4stat_pmu_alloc(&pdev->dev);
++ if (IS_ERR(l4stat_pmu))
++ return PTR_ERR(l4stat_pmu);
++
++ l4stat_pmu->regs = devm_platform_ioremap_resource(pdev, 0);
++ if (IS_ERR(l4stat_pmu->regs)) {
++ err = PTR_ERR(l4stat_pmu->regs);
++ goto exit_error;
++ }
++
++ err = l4stat_pmu_init(l4stat_pmu, pdev);
++ if (err)
++ goto exit_error;
++
++ return 0;
++
++exit_error:
++ dev_err(&pdev->dev, "%s driver initialization failed with error %d\n",
++ DRV_NAME, err);
++ return err;
++}
++
++static int l4stat_pmu_remove(struct platform_device *pdev)
++{
++ struct l4stat_pmu *l4stat_pmu = platform_get_drvdata(pdev);
++
++ perf_pmu_unregister(&l4stat_pmu->pmu);
++
++ return 0;
++}
++
++static const struct of_device_id l4stat_match[] = {
++ { .name = "GAISLER_L4STAT" },
++ { .name = "01_047" },
++ {},
++};
++
++MODULE_DEVICE_TABLE(of, l4stat_match);
++
++static struct platform_driver l4stat_pmu_driver = {
++ .driver = {
++ .name = DRV_NAME,
++ .of_match_table = l4stat_match,
++ },
++ .probe = l4stat_pmu_probe,
++ .remove = l4stat_pmu_remove,
++};
++
++module_platform_driver(l4stat_pmu_driver);
+--
+2.34.1
+
--- /dev/null
+From 61d9814f8215e73df88b39ccdf1515c688e62395 Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Thu, 23 Feb 2023 08:12:56 +0100
+Subject: [PATCH 30/32] sparc32,leon: Call install_leon_pmc early to enable
+ power down early
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ arch/sparc/kernel/leon_pmc.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/arch/sparc/kernel/leon_pmc.c b/arch/sparc/kernel/leon_pmc.c
+index 396f46bca52e..338edf4e2f80 100644
+--- a/arch/sparc/kernel/leon_pmc.c
++++ b/arch/sparc/kernel/leon_pmc.c
+@@ -88,7 +88,7 @@ static int __init leon_pmc_install(void)
+ return 0;
+ }
+
+-/* This driver is not critical to the boot process, don't care
+- * if initialized late.
++/* This driver is not critical to the boot process, but it can speed up
++ * simulation when in place early.
+ */
+-late_initcall(leon_pmc_install);
++early_initcall(leon_pmc_install);
+--
+2.34.1
+
--- /dev/null
+From 315becbd20f50b842c47b4438c884cec5b17f50e Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Fri, 24 Feb 2023 07:50:46 +0100
+Subject: [PATCH 31/32] sparc32,leon: Use banner-name property when available
+ when printing system type
+
+Signed-off-by: Andreas Larsson <andreas@gaisler.com>
+---
+ arch/sparc/include/asm/machines.h | 1 +
+ arch/sparc/kernel/idprom.c | 41 ++++++++++++++++++++++++-------
+ 2 files changed, 33 insertions(+), 9 deletions(-)
+
+diff --git a/arch/sparc/include/asm/machines.h b/arch/sparc/include/asm/machines.h
+index 9f78f70c6f11..69d4450440ce 100644
+--- a/arch/sparc/include/asm/machines.h
++++ b/arch/sparc/include/asm/machines.h
+@@ -11,6 +11,7 @@
+ struct Sun_Machine_Models {
+ char *name;
+ unsigned char id_machtype;
++ int use_banner;
+ };
+
+ /* The machine type in the idprom area looks like this:
+diff --git a/arch/sparc/kernel/idprom.c b/arch/sparc/kernel/idprom.c
+index d6c46d512220..3d7b81808c0f 100644
+--- a/arch/sparc/kernel/idprom.c
++++ b/arch/sparc/kernel/idprom.c
+@@ -28,14 +28,37 @@ static struct idprom idprom_buffer;
+ * know about. See asm-sparc/machines.h for empirical constants.
+ */
+ static struct Sun_Machine_Models Sun_Machines[] = {
+-/* First, Leon */
+-{ .name = "Leon3 System-on-a-Chip", .id_machtype = (M_LEON | M_LEON3_SOC) },
+-/* Finally, early Sun4m's */
+-{ .name = "Sun4m SparcSystem600", .id_machtype = (SM_SUN4M | SM_4M_SS60) },
+-{ .name = "Sun4m SparcStation10/20", .id_machtype = (SM_SUN4M | SM_4M_SS50) },
+-{ .name = "Sun4m SparcStation5", .id_machtype = (SM_SUN4M | SM_4M_SS40) },
+-/* One entry for the OBP arch's which are sun4d, sun4e, and newer sun4m's */
+-{ .name = "Sun4M OBP based system", .id_machtype = (SM_SUN4M_OBP | 0x0) } };
++ /* First, Leon */
++ {
++ .name = "Leon3 System-on-a-Chip",
++ .id_machtype = (M_LEON | M_LEON3_SOC),
++ .use_banner = 1
++ },
++ /* Finally, early Sun4m's */
++ {
++ .name = "Sun4m SparcSystem600",
++ .id_machtype = (SM_SUN4M | SM_4M_SS60),
++ .use_banner = 0
++ },
++ {
++ .name = "Sun4m SparcStation10/20",
++ .id_machtype = (SM_SUN4M | SM_4M_SS50),
++ .use_banner = 0
++ },
++ {
++ .name = "Sun4m SparcStation5",
++ .id_machtype = (SM_SUN4M | SM_4M_SS40),
++ .use_banner = 0
++ },
++ /* One entry for the OBP arch's which are sun4d, sun4e, and newer
++ * sun4m's
++ */
++ {
++ .name = "Sun4M OBP based system",
++ .id_machtype = (SM_SUN4M_OBP | 0x0),
++ .use_banner = 1
++ }
++};
+
+ static void __init display_system_type(unsigned char machtype)
+ {
+@@ -44,7 +67,7 @@ static void __init display_system_type(unsigned char machtype)
+
+ for (i = 0; i < ARRAY_SIZE(Sun_Machines); i++) {
+ if (Sun_Machines[i].id_machtype == machtype) {
+- if (machtype != (SM_SUN4M_OBP | 0x00) ||
++ if (!Sun_Machines[i].use_banner ||
+ prom_getproperty(prom_root_node, "banner-name",
+ sysname, sizeof(sysname)) <= 0)
+ printk(KERN_WARNING "TYPE: %s\n",
+--
+2.34.1
+
--- /dev/null
+From 15fd3f122fda175dc5141c44487d46ad97d521a6 Mon Sep 17 00:00:00 2001
+From: Andreas Larsson <andreas@gaisler.com>
+Date: Thu, 6 Apr 2023 14:04:51 +0200
+Subject: [PATCH 32/32] Set version suffix for LEON Linux 5.10-1.4 release
+
+---
+ localversion-leon | 1 +
+ 1 file changed, 1 insertion(+)
+ create mode 100644 localversion-leon
+
+diff --git a/localversion-leon b/localversion-leon
+new file mode 100644
+index 000000000000..ca33e31621b8
+--- /dev/null
++++ b/localversion-leon
+@@ -0,0 +1 @@
++-leon1.4
+--
+2.34.1
+