The mmiowb() macro is horribly difficult to use and drivers will continue
to work most of the time if they omit a call when it is required.
Rather than rely on driver authors getting this right, push mmiowb() into
arch_spin_unlock() for sh. If this is deemed to be a performance issue,
a subsequent optimisation could make use of ARCH_HAS_MMIOWB to elide
the barrier in cases where no I/O writes were performed inside the
critical section.
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
-generic-y += mmiowb.h
generic-y += parport.h
generic-y += percpu.h
generic-y += preempt.h
#define IO_SPACE_LIMIT 0xffffffff
-/* synco on SH-4A, otherwise a nop */
-#define mmiowb() wmb()
-
/* We really want to try and get these to memcpy etc */
void memcpy_fromio(void *, const volatile void __iomem *, unsigned long);
void memcpy_toio(volatile void __iomem *, const void *, unsigned long);
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_SH_MMIOWB_H
+#define __ASM_SH_MMIOWB_H
+
+#include <asm/barrier.h>
+
+/* synco on SH-4A, otherwise a nop */
+#define mmiowb() wmb()
+
+#include <asm-generic/mmiowb.h>
+
+#endif /* __ASM_SH_MMIOWB_H */
{
unsigned long tmp;
+ /* This could be optimised with ARCH_HAS_MMIOWB */
+ mmiowb();
__asm__ __volatile__ (
"mov #1, %0 ! arch_spin_unlock \n\t"
"mov.l %0, @%1 \n\t"