The order in __tlb_flush_mm_lazy is to flush TLB first and then clear
the mm->context.flush_mm bit. This can lead to missed flushes as the
bit can be set anytime, the order needs to be the other way aronud.
But this leads to a different race, __tlb_flush_mm_lazy may be called
on two CPUs concurrently. If mm->context.flush_mm is cleared first then
another CPU can bypass __tlb_flush_mm_lazy although the first CPU has
not done the flush yet. In a virtualized environment the time until the
flush is finally completed can be arbitrarily long.
Add a spinlock to serialize __tlb_flush_mm_lazy and use the function
in finish_arch_post_lock_switch as well.
Cc: <stable@vger.kernel.org>
Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
#include <linux/errno.h>
typedef struct {
+ spinlock_t lock;
cpumask_t cpu_attach_mask;
atomic_t flush_count;
unsigned int flush_mm;
} mm_context_t;
#define INIT_MM_CONTEXT(name) \
+ .context.lock = __SPIN_LOCK_UNLOCKED(name.context.lock), \
.context.pgtable_lock = \
__SPIN_LOCK_UNLOCKED(name.context.pgtable_lock), \
.context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
{
+ spin_lock_init(&mm->context.lock);
spin_lock_init(&mm->context.pgtable_lock);
INIT_LIST_HEAD(&mm->context.pgtable_list);
spin_lock_init(&mm->context.gmap_lock);
while (atomic_read(&mm->context.flush_count))
cpu_relax();
cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
- if (mm->context.flush_mm)
- __tlb_flush_mm(mm);
+ __tlb_flush_mm_lazy(mm);
preempt_enable();
}
set_fs(current->thread.mm_segment);
static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
{
+ spin_lock(&mm->context.lock);
if (mm->context.flush_mm) {
- __tlb_flush_mm(mm);
mm->context.flush_mm = 0;
+ __tlb_flush_mm(mm);
}
+ spin_unlock(&mm->context.lock);
}
/*