set_bit(X86_FEATURE_K7, c->x86_capability);
break;
}
+ if (c->x86 >= 6)
+ set_bit(X86_FEATURE_FXSAVE_LEAK, c->x86_capability);
display_cacheinfo(c);
prev->userrsp = read_pda(oldrsp);
write_pda(oldrsp, next->userrsp);
write_pda(pcurrent, next_p);
+
/* This must be here to ensure both math_state_restore() and
- kernel_fpu_begin() work consistently. */
+ kernel_fpu_begin() work consistently.
+ And the AMD workaround requires it to be after DS reload. */
unlazy_fpu(prev_p);
write_pda(kernelstack,
task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
+ /* Enable workaround for FXSAVE leak */
+ if (c->x86 >= 6)
+ set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
+
r = get_model_name(c);
if (!r) {
switch (c->x86) {
#define X86_FEATURE_P4 (3*32+ 7) /* P4 */
#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
#define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */
+#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */
#include <linux/sched.h>
#include <linux/init.h>
+#include <linux/kernel_stat.h>
#include <asm/processor.h>
#include <asm/sigcontext.h>
#include <asm/user.h>
extern void kernel_fpu_begin(void);
#define kernel_fpu_end() do { stts(); preempt_enable(); } while(0)
+/* We need a safe address that is cheap to find and that is already
+ in L1 during context switch. The best choices are unfortunately
+ different for UP and SMP */
+#ifdef CONFIG_SMP
+#define safe_address (__per_cpu_offset[0])
+#else
+#define safe_address (kstat_cpu(0).cpustat.user)
+#endif
+
/*
* These must be called with preempt disabled
*/
static inline void __save_init_fpu( struct task_struct *tsk )
{
+ /* Use more nops than strictly needed in case the compiler
+ varies code */
alternative_input(
- "fnsave %1 ; fwait ;" GENERIC_NOP2,
- "fxsave %1 ; fnclex",
+ "fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4,
+ "fxsave %[fx]\n"
+ "bt $7,%[fsw] ; jc 1f ; fnclex\n1:",
X86_FEATURE_FXSR,
- "m" (tsk->thread.i387.fxsave)
- :"memory");
+ [fx] "m" (tsk->thread.i387.fxsave),
+ [fsw] "m" (tsk->thread.i387.fxsave.swd) : "memory");
+ /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
+ is pending. Clear the x87 state here by setting it to fixed
+ values. __per_cpu_offset[0] is a random variable that should be in L1 */
+ alternative_input(
+ GENERIC_NOP8 GENERIC_NOP2,
+ "emms\n\t" /* clear stack tags */
+ "fildl %[addr]", /* set F?P to defined value */
+ X86_FEATURE_FXSAVE_LEAK,
+ [addr] "m" (safe_address));
task_thread_info(tsk)->status &= ~TS_USEDFPU;
}
#define X86_FEATURE_REP_GOOD (3*32+ 4) /* rep microcode works well on this CPU */
#define X86_FEATURE_CONSTANT_TSC (3*32+5) /* TSC runs at constant rate */
#define X86_FEATURE_SYNC_RDTSC (3*32+6) /* RDTSC syncs CPU core */
+#define X86_FEATURE_FXSAVE_LEAK (3*32+7) /* FIP/FOP/FDP leaks through FXSAVE */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */
#define set_fpu_swd(t,val) ((t)->thread.i387.fxsave.swd = (val))
#define set_fpu_fxsr_twd(t,val) ((t)->thread.i387.fxsave.twd = (val))
+#define X87_FSW_ES (1 << 7) /* Exception Summary */
+
+/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
+ is pending. Clear the x87 state here by setting it to fixed
+ values. The kernel data segment can be sometimes 0 and sometimes
+ new user value. Both should be ok.
+ Use the PDA as safe address because it should be already in L1. */
+static inline void clear_fpu_state(struct i387_fxsave_struct *fx)
+{
+ if (unlikely(fx->swd & X87_FSW_ES))
+ asm volatile("fnclex");
+ alternative_input(ASM_NOP8 ASM_NOP2,
+ " emms\n" /* clear stack tags */
+ " fildl %%gs:0", /* load to clear state */
+ X86_FEATURE_FXSAVE_LEAK);
+}
+
static inline int restore_fpu_checking(struct i387_fxsave_struct *fx)
{
int err;
#endif
if (unlikely(err))
__clear_user(fx, sizeof(struct i387_fxsave_struct));
+ /* No need to clear here because the caller clears USED_MATH */
return err;
}
"i" (offsetof(__typeof__(*tsk),
thread.i387.fxsave)));
#endif
- __asm__ __volatile__("fnclex");
+ clear_fpu_state(&tsk->thread.i387.fxsave);
}
static inline void kernel_fpu_begin(void)