--- /dev/null
+From b3b0870ef3ffed72b92415423da864f440f57ad6 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Thu, 16 Feb 2012 15:45:23 -0800
+Subject: i387: do not preload FPU state at task switch time
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit b3b0870ef3ffed72b92415423da864f440f57ad6 upstream.
+
+Yes, taking the trap to re-load the FPU/MMX state is expensive, but so
+is spending several days looking for a bug in the state save/restore
+code. And the preload code has some rather subtle interactions with
+both paravirtualization support and segment state restore, so it's not
+nearly as simple as it should be.
+
+Also, now that we no longer necessarily depend on a single bit (ie
+TS_USEDFPU) for keeping track of the state of the FPU, we migth be able
+to do better. If we are really switching between two processes that
+keep touching the FP state, save/restore is inevitable, but in the case
+of having one process that does most of the FPU usage, we may actually
+be able to do much better than the preloading.
+
+In particular, we may be able to keep track of which CPU the process ran
+on last, and also per CPU keep track of which process' FP state that CPU
+has. For modern CPU's that don't destroy the FPU contents on save time,
+that would allow us to do a lazy restore by just re-enabling the
+existing FPU state - with no restore cost at all!
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/i387.h | 1 -
+ arch/x86/kernel/process_32.c | 20 --------------------
+ arch/x86/kernel/process_64.c | 23 -----------------------
+ arch/x86/kernel/traps.c | 35 +++++++++++------------------------
+ 4 files changed, 11 insertions(+), 68 deletions(-)
+
+--- a/arch/x86/include/asm/i387.h
++++ b/arch/x86/include/asm/i387.h
+@@ -30,7 +30,6 @@ extern void fpu_init(void);
+ extern void mxcsr_feature_mask_init(void);
+ extern int init_fpu(struct task_struct *child);
+ extern void math_state_restore(void);
+-extern void __math_state_restore(void);
+ extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
+
+ extern user_regset_active_fn fpregs_active, xfpregs_active;
+--- a/arch/x86/kernel/process_32.c
++++ b/arch/x86/kernel/process_32.c
+@@ -293,23 +293,11 @@ __switch_to(struct task_struct *prev_p,
+ *next = &next_p->thread;
+ int cpu = smp_processor_id();
+ struct tss_struct *tss = &per_cpu(init_tss, cpu);
+- bool preload_fpu;
+
+ /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
+
+- /*
+- * If the task has used fpu the last 5 timeslices, just do a full
+- * restore of the math state immediately to avoid the trap; the
+- * chances of needing FPU soon are obviously high now
+- */
+- preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
+-
+ __unlazy_fpu(prev_p);
+
+- /* we're going to use this soon, after a few expensive things */
+- if (preload_fpu)
+- prefetch(next->fpu.state);
+-
+ /*
+ * Reload esp0.
+ */
+@@ -348,11 +336,6 @@ __switch_to(struct task_struct *prev_p,
+ task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
+ __switch_to_xtra(prev_p, next_p, tss);
+
+- /* If we're going to preload the fpu context, make sure clts
+- is run while we're batching the cpu state updates. */
+- if (preload_fpu)
+- clts();
+-
+ /*
+ * Leave lazy mode, flushing any hypercalls made here.
+ * This must be done before restoring TLS segments so
+@@ -362,9 +345,6 @@ __switch_to(struct task_struct *prev_p,
+ */
+ arch_end_context_switch(next_p);
+
+- if (preload_fpu)
+- __math_state_restore();
+-
+ /*
+ * Restore %gs if needed (which is common)
+ */
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -377,18 +377,6 @@ __switch_to(struct task_struct *prev_p,
+ int cpu = smp_processor_id();
+ struct tss_struct *tss = &per_cpu(init_tss, cpu);
+ unsigned fsindex, gsindex;
+- bool preload_fpu;
+-
+- /*
+- * If the task has used fpu the last 5 timeslices, just do a full
+- * restore of the math state immediately to avoid the trap; the
+- * chances of needing FPU soon are obviously high now
+- */
+- preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
+-
+- /* we're going to use this soon, after a few expensive things */
+- if (preload_fpu)
+- prefetch(next->fpu.state);
+
+ /*
+ * Reload esp0, LDT and the page table pointer:
+@@ -421,10 +409,6 @@ __switch_to(struct task_struct *prev_p,
+ /* Must be after DS reload */
+ __unlazy_fpu(prev_p);
+
+- /* Make sure cpu is ready for new context */
+- if (preload_fpu)
+- clts();
+-
+ /*
+ * Leave lazy mode, flushing any hypercalls made here.
+ * This must be done before restoring TLS segments so
+@@ -483,13 +467,6 @@ __switch_to(struct task_struct *prev_p,
+ task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
+ __switch_to_xtra(prev_p, next_p, tss);
+
+- /*
+- * Preload the FPU context, now that we've determined that the
+- * task is likely to be using it.
+- */
+- if (preload_fpu)
+- __math_state_restore();
+-
+ return prev_p;
+ }
+
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -717,28 +717,6 @@ asmlinkage void __attribute__((weak)) sm
+ }
+
+ /*
+- * __math_state_restore assumes that cr0.TS is already clear and the
+- * fpu state is all ready for use. Used during context switch.
+- */
+-void __math_state_restore(void)
+-{
+- struct thread_info *thread = current_thread_info();
+- struct task_struct *tsk = thread->task;
+-
+- /*
+- * Paranoid restore. send a SIGSEGV if we fail to restore the state.
+- */
+- if (unlikely(restore_fpu_checking(tsk))) {
+- stts();
+- force_sig(SIGSEGV, tsk);
+- return;
+- }
+-
+- __thread_set_has_fpu(thread); /* clts in caller! */
+- tsk->fpu_counter++;
+-}
+-
+-/*
+ * 'math_state_restore()' saves the current math information in the
+ * old math state array, and gets the new ones from the current task
+ *
+@@ -768,9 +746,18 @@ void math_state_restore(void)
+ local_irq_disable();
+ }
+
+- clts(); /* Allow maths ops (or we recurse) */
++ __thread_fpu_begin(thread);
+
+- __math_state_restore();
++ /*
++ * Paranoid restore. send a SIGSEGV if we fail to restore the state.
++ */
++ if (unlikely(restore_fpu_checking(tsk))) {
++ __thread_fpu_end(thread);
++ force_sig(SIGSEGV, tsk);
++ return;
++ }
++
++ tsk->fpu_counter++;
+ }
+ EXPORT_SYMBOL_GPL(math_state_restore);
+
--- /dev/null
+From 6d59d7a9f5b723a7ac1925c136e93ec83c0c3043 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Thu, 16 Feb 2012 13:33:12 -0800
+Subject: i387: don't ever touch TS_USEDFPU directly, use helper functions
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 6d59d7a9f5b723a7ac1925c136e93ec83c0c3043 upstream.
+
+This creates three helper functions that do the TS_USEDFPU accesses, and
+makes everybody that used to do it by hand use those helpers instead.
+
+In addition, there's a couple of helper functions for the "change both
+CR0.TS and TS_USEDFPU at the same time" case, and the places that do
+that together have been changed to use those. That means that we have
+fewer random places that open-code this situation.
+
+The intent is partly to clarify the code without actually changing any
+semantics yet (since we clearly still have some hard to reproduce bug in
+this area), but also to make it much easier to use another approach
+entirely to caching the CR0.TS bit for software accesses.
+
+Right now we use a bit in the thread-info 'status' variable (this patch
+does not change that), but we might want to make it a full field of its
+own or even make it a per-cpu variable.
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/i387.h | 75 ++++++++++++++++++++++++++++++++------------
+ arch/x86/kernel/traps.c | 2 -
+ arch/x86/kernel/xsave.c | 2 -
+ arch/x86/kvm/vmx.c | 2 -
+ 4 files changed, 58 insertions(+), 23 deletions(-)
+
+--- a/arch/x86/include/asm/i387.h
++++ b/arch/x86/include/asm/i387.h
+@@ -280,6 +280,47 @@ static inline int restore_fpu_checking(s
+ }
+
+ /*
++ * Software FPU state helpers. Careful: these need to
++ * be preemption protection *and* they need to be
++ * properly paired with the CR0.TS changes!
++ */
++static inline int __thread_has_fpu(struct thread_info *ti)
++{
++ return ti->status & TS_USEDFPU;
++}
++
++/* Must be paired with an 'stts' after! */
++static inline void __thread_clear_has_fpu(struct thread_info *ti)
++{
++ ti->status &= ~TS_USEDFPU;
++}
++
++/* Must be paired with a 'clts' before! */
++static inline void __thread_set_has_fpu(struct thread_info *ti)
++{
++ ti->status |= TS_USEDFPU;
++}
++
++/*
++ * Encapsulate the CR0.TS handling together with the
++ * software flag.
++ *
++ * These generally need preemption protection to work,
++ * do try to avoid using these on their own.
++ */
++static inline void __thread_fpu_end(struct thread_info *ti)
++{
++ __thread_clear_has_fpu(ti);
++ stts();
++}
++
++static inline void __thread_fpu_begin(struct thread_info *ti)
++{
++ clts();
++ __thread_set_has_fpu(ti);
++}
++
++/*
+ * Signal frame handlers...
+ */
+ extern int save_i387_xstate(void __user *buf);
+@@ -287,23 +328,21 @@ extern int restore_i387_xstate(void __us
+
+ static inline void __unlazy_fpu(struct task_struct *tsk)
+ {
+- if (task_thread_info(tsk)->status & TS_USEDFPU) {
++ if (__thread_has_fpu(task_thread_info(tsk))) {
+ __save_init_fpu(tsk);
+- task_thread_info(tsk)->status &= ~TS_USEDFPU;
+- stts();
++ __thread_fpu_end(task_thread_info(tsk));
+ } else
+ tsk->fpu_counter = 0;
+ }
+
+ static inline void __clear_fpu(struct task_struct *tsk)
+ {
+- if (task_thread_info(tsk)->status & TS_USEDFPU) {
++ if (__thread_has_fpu(task_thread_info(tsk))) {
+ /* Ignore delayed exceptions from user space */
+ asm volatile("1: fwait\n"
+ "2:\n"
+ _ASM_EXTABLE(1b, 2b));
+- task_thread_info(tsk)->status &= ~TS_USEDFPU;
+- stts();
++ __thread_fpu_end(task_thread_info(tsk));
+ }
+ }
+
+@@ -311,14 +350,14 @@ static inline void __clear_fpu(struct ta
+ * Were we in an interrupt that interrupted kernel mode?
+ *
+ * We can do a kernel_fpu_begin/end() pair *ONLY* if that
+- * pair does nothing at all: TS_USEDFPU must be clear (so
++ * pair does nothing at all: the thread must not have fpu (so
+ * that we don't try to save the FPU state), and TS must
+ * be set (so that the clts/stts pair does nothing that is
+ * visible in the interrupted kernel thread).
+ */
+ static inline bool interrupted_kernel_fpu_idle(void)
+ {
+- return !(current_thread_info()->status & TS_USEDFPU) &&
++ return !__thread_has_fpu(current_thread_info()) &&
+ (read_cr0() & X86_CR0_TS);
+ }
+
+@@ -356,9 +395,9 @@ static inline void kernel_fpu_begin(void
+
+ WARN_ON_ONCE(!irq_fpu_usable());
+ preempt_disable();
+- if (me->status & TS_USEDFPU) {
++ if (__thread_has_fpu(me)) {
+ __save_init_fpu(me->task);
+- me->status &= ~TS_USEDFPU;
++ __thread_clear_has_fpu(me);
+ /* We do 'stts()' in kernel_fpu_end() */
+ } else
+ clts();
+@@ -422,24 +461,21 @@ static inline void irq_ts_restore(int TS
+ */
+ static inline int user_has_fpu(void)
+ {
+- return current_thread_info()->status & TS_USEDFPU;
++ return __thread_has_fpu(current_thread_info());
+ }
+
+ static inline void user_fpu_end(void)
+ {
+ preempt_disable();
+- current_thread_info()->status &= ~TS_USEDFPU;
+- stts();
++ __thread_fpu_end(current_thread_info());
+ preempt_enable();
+ }
+
+ static inline void user_fpu_begin(void)
+ {
+ preempt_disable();
+- if (!user_has_fpu()) {
+- clts();
+- current_thread_info()->status |= TS_USEDFPU;
+- }
++ if (!user_has_fpu())
++ __thread_fpu_begin(current_thread_info());
+ preempt_enable();
+ }
+
+@@ -448,11 +484,10 @@ static inline void user_fpu_begin(void)
+ */
+ static inline void save_init_fpu(struct task_struct *tsk)
+ {
+- WARN_ON_ONCE(!(task_thread_info(tsk)->status & TS_USEDFPU));
++ WARN_ON_ONCE(!__thread_has_fpu(task_thread_info(tsk)));
+ preempt_disable();
+ __save_init_fpu(tsk);
+- task_thread_info(tsk)->status &= ~TS_USEDFPU;
+- stts();
++ __thread_fpu_end(task_thread_info(tsk));
+ preempt_enable();
+ }
+
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -734,7 +734,7 @@ void __math_state_restore(void)
+ return;
+ }
+
+- thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
++ __thread_set_has_fpu(thread); /* clts in caller! */
+ tsk->fpu_counter++;
+ }
+
+--- a/arch/x86/kernel/xsave.c
++++ b/arch/x86/kernel/xsave.c
+@@ -47,7 +47,7 @@ void __sanitize_i387_state(struct task_s
+ if (!fx)
+ return;
+
+- BUG_ON(task_thread_info(tsk)->status & TS_USEDFPU);
++ BUG_ON(__thread_has_fpu(task_thread_info(tsk)));
+
+ xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv;
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -948,7 +948,7 @@ static void __vmx_load_host_state(struct
+ #ifdef CONFIG_X86_64
+ wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+ #endif
+- if (current_thread_info()->status & TS_USEDFPU)
++ if (__thread_has_fpu(current_thread_info()))
+ clts();
+ load_gdt(&__get_cpu_var(host_gdt));
+ }
--- /dev/null
+From c38e23456278e967f094b08247ffc3711b1029b2 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Wed, 15 Feb 2012 08:05:18 -0800
+Subject: i387: fix sense of sanity check
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit c38e23456278e967f094b08247ffc3711b1029b2 upstream.
+
+The check for save_init_fpu() (introduced in commit 5b1cbac37798: "i387:
+make irq_fpu_usable() tests more robust") was the wrong way around, but
+I hadn't noticed, because my "tests" were bogus: the FPU exceptions are
+disabled by default, so even doing a divide by zero never actually
+triggers this code at all unless you do extra work to enable them.
+
+So if anybody did enable them, they'd get one spurious warning.
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/i387.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/i387.h
++++ b/arch/x86/include/asm/i387.h
+@@ -404,7 +404,7 @@ static inline void irq_ts_restore(int TS
+ */
+ static inline void save_init_fpu(struct task_struct *tsk)
+ {
+- WARN_ON_ONCE(task_thread_info(tsk)->status & TS_USEDFPU);
++ WARN_ON_ONCE(!(task_thread_info(tsk)->status & TS_USEDFPU));
+ preempt_disable();
+ __save_init_fpu(tsk);
+ stts();
--- /dev/null
+From 15d8791cae75dca27bfda8ecfe87dca9379d6bb0 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Thu, 16 Feb 2012 09:15:04 -0800
+Subject: i387: fix x86-64 preemption-unsafe user stack save/restore
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 15d8791cae75dca27bfda8ecfe87dca9379d6bb0 upstream.
+
+Commit 5b1cbac37798 ("i387: make irq_fpu_usable() tests more robust")
+added a sanity check to the #NM handler to verify that we never cause
+the "Device Not Available" exception in kernel mode.
+
+However, that check actually pinpointed a (fundamental) race where we do
+cause that exception as part of the signal stack FPU state save/restore
+code.
+
+Because we use the floating point instructions themselves to save and
+restore state directly from user mode, we cannot do that atomically with
+testing the TS_USEDFPU bit: the user mode access itself may cause a page
+fault, which causes a task switch, which saves and restores the FP/MMX
+state from the kernel buffers.
+
+This kind of "recursive" FP state save is fine per se, but it means that
+when the signal stack save/restore gets restarted, it will now take the
+'#NM' exception we originally tried to avoid. With preemption this can
+happen even without the page fault - but because of the user access, we
+cannot just disable preemption around the save/restore instruction.
+
+There are various ways to solve this, including using the
+"enable/disable_page_fault()" helpers to not allow page faults at all
+during the sequence, and fall back to copying things by hand without the
+use of the native FP state save/restore instructions.
+
+However, the simplest thing to do is to just allow the #NM from kernel
+space, but fix the race in setting and clearing CR0.TS that this all
+exposed: the TS bit changes and the TS_USEDFPU bit absolutely have to be
+atomic wrt scheduling, so while the actual state save/restore can be
+interrupted and restarted, the act of actually clearing/setting CR0.TS
+and the TS_USEDFPU bit together must not.
+
+Instead of just adding random "preempt_disable/enable()" calls to what
+is already excessively ugly code, this introduces some helper functions
+that mostly mirror the "kernel_fpu_begin/end()" functionality, just for
+the user state instead.
+
+Those helper functions should probably eventually replace the other
+ad-hoc CR0.TS and TS_USEDFPU tests too, but I'll need to think about it
+some more: the task switching functionality in particular needs to
+expose the difference between the 'prev' and 'next' threads, while the
+new helper functions intentionally were written to only work with
+'current'.
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/i387.h | 42 ++++++++++++++++++++++++++++++++++++++++++
+ arch/x86/kernel/traps.c | 1 -
+ arch/x86/kernel/xsave.c | 10 +++-------
+ 3 files changed, 45 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/include/asm/i387.h
++++ b/arch/x86/include/asm/i387.h
+@@ -400,6 +400,48 @@ static inline void irq_ts_restore(int TS
+ }
+
+ /*
++ * The question "does this thread have fpu access?"
++ * is slightly racy, since preemption could come in
++ * and revoke it immediately after the test.
++ *
++ * However, even in that very unlikely scenario,
++ * we can just assume we have FPU access - typically
++ * to save the FP state - we'll just take a #NM
++ * fault and get the FPU access back.
++ *
++ * The actual user_fpu_begin/end() functions
++ * need to be preemption-safe, though.
++ *
++ * NOTE! user_fpu_end() must be used only after you
++ * have saved the FP state, and user_fpu_begin() must
++ * be used only immediately before restoring it.
++ * These functions do not do any save/restore on
++ * their own.
++ */
++static inline int user_has_fpu(void)
++{
++ return current_thread_info()->status & TS_USEDFPU;
++}
++
++static inline void user_fpu_end(void)
++{
++ preempt_disable();
++ current_thread_info()->status &= ~TS_USEDFPU;
++ stts();
++ preempt_enable();
++}
++
++static inline void user_fpu_begin(void)
++{
++ preempt_disable();
++ if (!user_has_fpu()) {
++ clts();
++ current_thread_info()->status |= TS_USEDFPU;
++ }
++ preempt_enable();
++}
++
++/*
+ * These disable preemption on their own and are safe
+ */
+ static inline void save_init_fpu(struct task_struct *tsk)
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -777,7 +777,6 @@ EXPORT_SYMBOL_GPL(math_state_restore);
+ dotraplinkage void __kprobes
+ do_device_not_available(struct pt_regs *regs, long error_code)
+ {
+- WARN_ON_ONCE(!user_mode_vm(regs));
+ #ifdef CONFIG_MATH_EMULATION
+ if (read_cr0() & X86_CR0_EM) {
+ struct math_emu_info info = { };
+--- a/arch/x86/kernel/xsave.c
++++ b/arch/x86/kernel/xsave.c
+@@ -168,7 +168,7 @@ int save_i387_xstate(void __user *buf)
+ if (!used_math())
+ return 0;
+
+- if (task_thread_info(tsk)->status & TS_USEDFPU) {
++ if (user_has_fpu()) {
+ if (use_xsave())
+ err = xsave_user(buf);
+ else
+@@ -176,8 +176,7 @@ int save_i387_xstate(void __user *buf)
+
+ if (err)
+ return err;
+- task_thread_info(tsk)->status &= ~TS_USEDFPU;
+- stts();
++ user_fpu_end();
+ } else {
+ sanitize_i387_state(tsk);
+ if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave,
+@@ -292,10 +291,7 @@ int restore_i387_xstate(void __user *buf
+ return err;
+ }
+
+- if (!(task_thread_info(current)->status & TS_USEDFPU)) {
+- clts();
+- task_thread_info(current)->status |= TS_USEDFPU;
+- }
++ user_fpu_begin();
+ if (use_xsave())
+ err = restore_user_xstate(buf);
+ else
--- /dev/null
+From 5b1cbac37798805c1fee18c8cebe5c0a13975b17 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Mon, 13 Feb 2012 13:56:14 -0800
+Subject: i387: make irq_fpu_usable() tests more robust
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 5b1cbac37798805c1fee18c8cebe5c0a13975b17 upstream.
+
+Some code - especially the crypto layer - wants to use the x86
+FP/MMX/AVX register set in what may be interrupt (typically softirq)
+context.
+
+That *can* be ok, but the tests for when it was ok were somewhat
+suspect. We cannot touch the thread-specific status bits either, so
+we'd better check that we're not going to try to save FP state or
+anything like that.
+
+Now, it may be that the TS bit is always cleared *before* we set the
+USEDFPU bit (and only set when we had already cleared the USEDFP
+before), so the TS bit test may actually have been sufficient, but it
+certainly was not obviously so.
+
+So this explicitly verifies that we will not touch the TS_USEDFPU bit,
+and adds a few related sanity-checks. Because it seems that somehow
+AES-NI is corrupting user FP state. The cause is not clear, and this
+patch doesn't fix it, but while debugging it I really wanted the code to
+be more obviously correct and robust.
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/i387.h | 54 +++++++++++++++++++++++++++++++++++++-------
+ arch/x86/kernel/traps.c | 1
+ 2 files changed, 47 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/include/asm/i387.h
++++ b/arch/x86/include/asm/i387.h
+@@ -307,9 +307,54 @@ static inline void __clear_fpu(struct ta
+ }
+ }
+
++/*
++ * Were we in an interrupt that interrupted kernel mode?
++ *
++ * We can do a kernel_fpu_begin/end() pair *ONLY* if that
++ * pair does nothing at all: TS_USEDFPU must be clear (so
++ * that we don't try to save the FPU state), and TS must
++ * be set (so that the clts/stts pair does nothing that is
++ * visible in the interrupted kernel thread).
++ */
++static inline bool interrupted_kernel_fpu_idle(void)
++{
++ return !(current_thread_info()->status & TS_USEDFPU) &&
++ (read_cr0() & X86_CR0_TS);
++}
++
++/*
++ * Were we in user mode (or vm86 mode) when we were
++ * interrupted?
++ *
++ * Doing kernel_fpu_begin/end() is ok if we are running
++ * in an interrupt context from user mode - we'll just
++ * save the FPU state as required.
++ */
++static inline bool interrupted_user_mode(void)
++{
++ struct pt_regs *regs = get_irq_regs();
++ return regs && user_mode_vm(regs);
++}
++
++/*
++ * Can we use the FPU in kernel mode with the
++ * whole "kernel_fpu_begin/end()" sequence?
++ *
++ * It's always ok in process context (ie "not interrupt")
++ * but it is sometimes ok even from an irq.
++ */
++static inline bool irq_fpu_usable(void)
++{
++ return !in_interrupt() ||
++ interrupted_user_mode() ||
++ interrupted_kernel_fpu_idle();
++}
++
+ static inline void kernel_fpu_begin(void)
+ {
+ struct thread_info *me = current_thread_info();
++
++ WARN_ON_ONCE(!irq_fpu_usable());
+ preempt_disable();
+ if (me->status & TS_USEDFPU)
+ __save_init_fpu(me->task);
+@@ -323,14 +368,6 @@ static inline void kernel_fpu_end(void)
+ preempt_enable();
+ }
+
+-static inline bool irq_fpu_usable(void)
+-{
+- struct pt_regs *regs;
+-
+- return !in_interrupt() || !(regs = get_irq_regs()) || \
+- user_mode(regs) || (read_cr0() & X86_CR0_TS);
+-}
+-
+ /*
+ * Some instructions like VIA's padlock instructions generate a spurious
+ * DNA fault but don't modify SSE registers. And these instructions
+@@ -367,6 +404,7 @@ static inline void irq_ts_restore(int TS
+ */
+ static inline void save_init_fpu(struct task_struct *tsk)
+ {
++ WARN_ON_ONCE(task_thread_info(tsk)->status & TS_USEDFPU);
+ preempt_disable();
+ __save_init_fpu(tsk);
+ stts();
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -777,6 +777,7 @@ EXPORT_SYMBOL_GPL(math_state_restore);
+ dotraplinkage void __kprobes
+ do_device_not_available(struct pt_regs *regs, long error_code)
+ {
++ WARN_ON_ONCE(!user_mode_vm(regs));
+ #ifdef CONFIG_MATH_EMULATION
+ if (read_cr0() & X86_CR0_EM) {
+ struct math_emu_info info = { };
--- /dev/null
+From be98c2cdb15ba26148cd2bd58a857d4f7759ed38 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Mon, 13 Feb 2012 13:47:25 -0800
+Subject: i387: math_state_restore() isn't called from asm
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit be98c2cdb15ba26148cd2bd58a857d4f7759ed38 upstream.
+
+It was marked asmlinkage for some really old and stale legacy reasons.
+Fix that and the equally stale comment.
+
+Noticed when debugging the irq_fpu_usable() bugs.
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/i387.h | 2 +-
+ arch/x86/kernel/traps.c | 6 +++---
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/include/asm/i387.h
++++ b/arch/x86/include/asm/i387.h
+@@ -29,7 +29,7 @@ extern unsigned int sig_xstate_size;
+ extern void fpu_init(void);
+ extern void mxcsr_feature_mask_init(void);
+ extern int init_fpu(struct task_struct *child);
+-extern asmlinkage void math_state_restore(void);
++extern void math_state_restore(void);
+ extern void __math_state_restore(void);
+ extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
+
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -745,10 +745,10 @@ void __math_state_restore(void)
+ * Careful.. There are problems with IBM-designed IRQ13 behaviour.
+ * Don't touch unless you *really* know how it works.
+ *
+- * Must be called with kernel preemption disabled (in this case,
+- * local interrupts are disabled at the call-site in entry.S).
++ * Must be called with kernel preemption disabled (eg with local
++ * local interrupts as in the case of do_device_not_available).
+ */
+-asmlinkage void math_state_restore(void)
++void math_state_restore(void)
+ {
+ struct thread_info *thread = current_thread_info();
+ struct task_struct *tsk = thread->task;
--- /dev/null
+From b6c66418dcad0fcf83cd1d0a39482db37bf4fc41 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Thu, 16 Feb 2012 12:22:48 -0800
+Subject: i387: move TS_USEDFPU clearing out of __save_init_fpu and into callers
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit b6c66418dcad0fcf83cd1d0a39482db37bf4fc41 upstream.
+
+Touching TS_USEDFPU without touching CR0.TS is confusing, so don't do
+it. By moving it into the callers, we always do the TS_USEDFPU next to
+the CR0.TS accesses in the source code, and it's much easier to see how
+the two go hand in hand.
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/i387.h | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/include/asm/i387.h
++++ b/arch/x86/include/asm/i387.h
+@@ -259,7 +259,6 @@ static inline void fpu_save_init(struct
+ static inline void __save_init_fpu(struct task_struct *tsk)
+ {
+ fpu_save_init(&tsk->thread.fpu);
+- task_thread_info(tsk)->status &= ~TS_USEDFPU;
+ }
+
+ static inline int fpu_fxrstor_checking(struct fpu *fpu)
+@@ -290,6 +289,7 @@ static inline void __unlazy_fpu(struct t
+ {
+ if (task_thread_info(tsk)->status & TS_USEDFPU) {
+ __save_init_fpu(tsk);
++ task_thread_info(tsk)->status &= ~TS_USEDFPU;
+ stts();
+ } else
+ tsk->fpu_counter = 0;
+@@ -356,9 +356,11 @@ static inline void kernel_fpu_begin(void
+
+ WARN_ON_ONCE(!irq_fpu_usable());
+ preempt_disable();
+- if (me->status & TS_USEDFPU)
++ if (me->status & TS_USEDFPU) {
+ __save_init_fpu(me->task);
+- else
++ me->status &= ~TS_USEDFPU;
++ /* We do 'stts()' in kernel_fpu_end() */
++ } else
+ clts();
+ }
+
+@@ -449,6 +451,7 @@ static inline void save_init_fpu(struct
+ WARN_ON_ONCE(!(task_thread_info(tsk)->status & TS_USEDFPU));
+ preempt_disable();
+ __save_init_fpu(tsk);
++ task_thread_info(tsk)->status &= ~TS_USEDFPU;
+ stts();
+ preempt_enable();
+ }
xhci-fix-oops-caused-by-more-usb2-ports-than-usb3-ports.patch
xhci-fix-encoding-for-hs-bulk-control-nak-rate.patch
usb-set-hub-depth-after-usb3-hub-reset.patch
+i387-math_state_restore-isn-t-called-from-asm.patch
+i387-make-irq_fpu_usable-tests-more-robust.patch
+i387-fix-sense-of-sanity-check.patch
+i387-fix-x86-64-preemption-unsafe-user-stack-save-restore.patch
+i387-move-ts_usedfpu-clearing-out-of-__save_init_fpu-and-into-callers.patch
+i387-don-t-ever-touch-ts_usedfpu-directly-use-helper-functions.patch
+i387-do-not-preload-fpu-state-at-task-switch-time.patch