--- /dev/null
+From f94edacf998516ac9d849f7bc6949a703977a7f3 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Fri, 17 Feb 2012 21:48:54 -0800
+Subject: i387: move TS_USEDFPU flag from thread_info to task_struct
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit f94edacf998516ac9d849f7bc6949a703977a7f3 upstream.
+
+This moves the bit that indicates whether a thread has ownership of the
+FPU from the TS_USEDFPU bit in thread_info->status to a word of its own
+(called 'has_fpu') in task_struct->thread.has_fpu.
+
+This fixes two independent bugs at the same time:
+
+ - changing 'thread_info->status' from the scheduler causes nasty
+ problems for the other users of that variable, since it is defined to
+ be thread-synchronous (that's what the "TS_" part of the naming was
+ supposed to indicate).
+
+ So perfectly valid code could (and did) do
+
+ ti->status |= TS_RESTORE_SIGMASK;
+
+ and the compiler was free to do that as separate load, or and store
+ instructions. Which can cause problems with preemption, since a task
+ switch could happen in between, and change the TS_USEDFPU bit. The
+ change to TS_USEDFPU would be overwritten by the final store.
+
+ In practice, this seldom happened, though, because the 'status' field
+ was seldom used more than once, so gcc would generally tend to
+ generate code that used a read-modify-write instruction and thus
+ happened to avoid this problem - RMW instructions are naturally low
+ fat and preemption-safe.
+
+ - On x86-32, the current_thread_info() pointer would, during interrupts
+ and softirqs, point to a *copy* of the real thread_info, because
+ x86-32 uses %esp to calculate the thread_info address, and thus the
+ separate irq (and softirq) stacks would cause these kinds of odd
+ thread_info copy aliases.
+
+ This is normally not a problem, since interrupts aren't supposed to
+ look at thread information anyway (what thread is running at
+ interrupt time really isn't very well-defined), but it confused the
+ heck out of irq_fpu_usable() and the code that tried to squirrel
+ away the FPU state.
+
+ (It also caused untold confusion for us poor kernel developers).
+
+It also turns out that using 'task_struct' is actually much more natural
+for most of the call sites that care about the FPU state, since they
+tend to work with the task struct for other reasons anyway (ie
+scheduling). And the FPU data that we are going to save/restore is
+found there too.
+
+Thanks to Arjan Van De Ven <arjan@linux.intel.com> for pointing us to
+the %esp issue.
+
+Cc: Arjan van de Ven <arjan@linux.intel.com>
+Reported-and-tested-by: Raphael Prevost <raphael@buro.asia>
+Acked-and-tested-by: Suresh Siddha <suresh.b.siddha@intel.com>
+Tested-by: Peter Anvin <hpa@zytor.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/i387.h | 44 ++++++++++++++++++-------------------
+ arch/x86/include/asm/processor.h | 1
+ arch/x86/include/asm/thread_info.h | 2 -
+ arch/x86/kernel/traps.c | 11 ++++-----
+ arch/x86/kernel/xsave.c | 2 -
+ arch/x86/kvm/vmx.c | 2 -
+ 6 files changed, 30 insertions(+), 32 deletions(-)
+
+--- a/arch/x86/include/asm/i387.h
++++ b/arch/x86/include/asm/i387.h
+@@ -264,21 +264,21 @@ static inline int restore_fpu_checking(s
+ * be preemption protection *and* they need to be
+ * properly paired with the CR0.TS changes!
+ */
+-static inline int __thread_has_fpu(struct thread_info *ti)
++static inline int __thread_has_fpu(struct task_struct *tsk)
+ {
+- return ti->status & TS_USEDFPU;
++ return tsk->thread.has_fpu;
+ }
+
+ /* Must be paired with an 'stts' after! */
+-static inline void __thread_clear_has_fpu(struct thread_info *ti)
++static inline void __thread_clear_has_fpu(struct task_struct *tsk)
+ {
+- ti->status &= ~TS_USEDFPU;
++ tsk->thread.has_fpu = 0;
+ }
+
+ /* Must be paired with a 'clts' before! */
+-static inline void __thread_set_has_fpu(struct thread_info *ti)
++static inline void __thread_set_has_fpu(struct task_struct *tsk)
+ {
+- ti->status |= TS_USEDFPU;
++ tsk->thread.has_fpu = 1;
+ }
+
+ /*
+@@ -288,16 +288,16 @@ static inline void __thread_set_has_fpu(
+ * These generally need preemption protection to work,
+ * do try to avoid using these on their own.
+ */
+-static inline void __thread_fpu_end(struct thread_info *ti)
++static inline void __thread_fpu_end(struct task_struct *tsk)
+ {
+- __thread_clear_has_fpu(ti);
++ __thread_clear_has_fpu(tsk);
+ stts();
+ }
+
+-static inline void __thread_fpu_begin(struct thread_info *ti)
++static inline void __thread_fpu_begin(struct task_struct *tsk)
+ {
+ clts();
+- __thread_set_has_fpu(ti);
++ __thread_set_has_fpu(tsk);
+ }
+
+ /*
+@@ -308,21 +308,21 @@ extern int restore_i387_xstate(void __us
+
+ static inline void __unlazy_fpu(struct task_struct *tsk)
+ {
+- if (__thread_has_fpu(task_thread_info(tsk))) {
++ if (__thread_has_fpu(tsk)) {
+ __save_init_fpu(tsk);
+- __thread_fpu_end(task_thread_info(tsk));
++ __thread_fpu_end(tsk);
+ } else
+ tsk->fpu_counter = 0;
+ }
+
+ static inline void __clear_fpu(struct task_struct *tsk)
+ {
+- if (__thread_has_fpu(task_thread_info(tsk))) {
++ if (__thread_has_fpu(tsk)) {
+ /* Ignore delayed exceptions from user space */
+ asm volatile("1: fwait\n"
+ "2:\n"
+ _ASM_EXTABLE(1b, 2b));
+- __thread_fpu_end(task_thread_info(tsk));
++ __thread_fpu_end(tsk);
+ }
+ }
+
+@@ -337,7 +337,7 @@ static inline void __clear_fpu(struct ta
+ */
+ static inline bool interrupted_kernel_fpu_idle(void)
+ {
+- return !__thread_has_fpu(current_thread_info()) &&
++ return !__thread_has_fpu(current) &&
+ (read_cr0() & X86_CR0_TS);
+ }
+
+@@ -371,12 +371,12 @@ static inline bool irq_fpu_usable(void)
+
+ static inline void kernel_fpu_begin(void)
+ {
+- struct thread_info *me = current_thread_info();
++ struct task_struct *me = current;
+
+ WARN_ON_ONCE(!irq_fpu_usable());
+ preempt_disable();
+ if (__thread_has_fpu(me)) {
+- __save_init_fpu(me->task);
++ __save_init_fpu(me);
+ __thread_clear_has_fpu(me);
+ /* We do 'stts()' in kernel_fpu_end() */
+ } else
+@@ -441,13 +441,13 @@ static inline void irq_ts_restore(int TS
+ */
+ static inline int user_has_fpu(void)
+ {
+- return __thread_has_fpu(current_thread_info());
++ return __thread_has_fpu(current);
+ }
+
+ static inline void user_fpu_end(void)
+ {
+ preempt_disable();
+- __thread_fpu_end(current_thread_info());
++ __thread_fpu_end(current);
+ preempt_enable();
+ }
+
+@@ -455,7 +455,7 @@ static inline void user_fpu_begin(void)
+ {
+ preempt_disable();
+ if (!user_has_fpu())
+- __thread_fpu_begin(current_thread_info());
++ __thread_fpu_begin(current);
+ preempt_enable();
+ }
+
+@@ -464,10 +464,10 @@ static inline void user_fpu_begin(void)
+ */
+ static inline void save_init_fpu(struct task_struct *tsk)
+ {
+- WARN_ON_ONCE(!__thread_has_fpu(task_thread_info(tsk)));
++ WARN_ON_ONCE(!__thread_has_fpu(tsk));
+ preempt_disable();
+ __save_init_fpu(tsk);
+- __thread_fpu_end(task_thread_info(tsk));
++ __thread_fpu_end(tsk);
+ preempt_enable();
+ }
+
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -456,6 +456,7 @@ struct thread_struct {
+ unsigned long trap_no;
+ unsigned long error_code;
+ /* floating point and extended processor state */
++ unsigned long has_fpu;
+ struct fpu fpu;
+ #ifdef CONFIG_X86_32
+ /* Virtual 86 mode info */
+--- a/arch/x86/include/asm/thread_info.h
++++ b/arch/x86/include/asm/thread_info.h
+@@ -242,8 +242,6 @@ static inline struct thread_info *curren
+ * ever touches our thread-synchronous status, so we don't
+ * have to worry about atomic accesses.
+ */
+-#define TS_USEDFPU 0x0001 /* FPU was used by this task
+- this quantum (SMP) */
+ #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
+ #define TS_POLLING 0x0004 /* idle task polling need_resched,
+ skip sending interrupt */
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -573,12 +573,11 @@ asmlinkage void __attribute__((weak)) sm
+ */
+ void math_state_restore(void)
+ {
+- struct thread_info *thread = current_thread_info();
+- struct task_struct *tsk = thread->task;
++ struct task_struct *tsk = current;
+
+ /* We need a safe address that is cheap to find and that is already
+- in L1. We just brought in "thread->task", so use that */
+-#define safe_address (thread->task)
++ in L1. We're just bringing in "tsk->thread.has_fpu", so use that */
++#define safe_address (tsk->thread.has_fpu)
+
+ if (!tsk_used_math(tsk)) {
+ local_irq_enable();
+@@ -595,7 +594,7 @@ void math_state_restore(void)
+ local_irq_disable();
+ }
+
+- __thread_fpu_begin(thread);
++ __thread_fpu_begin(tsk);
+
+ /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
+ is pending. Clear the x87 state here by setting it to fixed
+@@ -611,7 +610,7 @@ void math_state_restore(void)
+ * Paranoid restore. send a SIGSEGV if we fail to restore the state.
+ */
+ if (unlikely(restore_fpu_checking(tsk))) {
+- __thread_fpu_end(thread);
++ __thread_fpu_end(tsk);
+ force_sig(SIGSEGV, tsk);
+ return;
+ }
+--- a/arch/x86/kernel/xsave.c
++++ b/arch/x86/kernel/xsave.c
+@@ -47,7 +47,7 @@ void __sanitize_i387_state(struct task_s
+ if (!fx)
+ return;
+
+- BUG_ON(__thread_has_fpu(task_thread_info(tsk)));
++ BUG_ON(__thread_has_fpu(tsk));
+
+ xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv;
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -1456,7 +1456,7 @@ static void __vmx_load_host_state(struct
+ #ifdef CONFIG_X86_64
+ wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+ #endif
+- if (__thread_has_fpu(current_thread_info()))
++ if (__thread_has_fpu(current))
+ clts();
+ load_gdt(&__get_cpu_var(host_gdt));
+ }
--- /dev/null
+From 34ddc81a230b15c0e345b6b253049db731499f7e Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Sat, 18 Feb 2012 12:56:35 -0800
+Subject: i387: re-introduce FPU state preloading at context switch time
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 34ddc81a230b15c0e345b6b253049db731499f7e upstream.
+
+After all the FPU state cleanups and finally finding the problem that
+caused all our FPU save/restore problems, this re-introduces the
+preloading of FPU state that was removed in commit b3b0870ef3ff ("i387:
+do not preload FPU state at task switch time").
+
+However, instead of simply reverting the removal, this reimplements
+preloading with several fixes, most notably
+
+ - properly abstracted as a true FPU state switch, rather than as
+ open-coded save and restore with various hacks.
+
+ In particular, implementing it as a proper FPU state switch allows us
+ to optimize the CR0.TS flag accesses: there is no reason to set the
+ TS bit only to then almost immediately clear it again. CR0 accesses
+ are quite slow and expensive, don't flip the bit back and forth for
+ no good reason.
+
+ - Make sure that the same model works for both x86-32 and x86-64, so
+ that there are no gratuitous differences between the two due to the
+ way they save and restore segment state differently due to
+ architectural differences that really don't matter to the FPU state.
+
+ - Avoid exposing the "preload" state to the context switch routines,
+ and in particular allow the concept of lazy state restore: if nothing
+ else has used the FPU in the meantime, and the process is still on
+ the same CPU, we can avoid restoring state from memory entirely, just
+ re-expose the state that is still in the FPU unit.
+
+ That optimized lazy restore isn't actually implemented here, but the
+ infrastructure is set up for it. Of course, older CPU's that use
+ 'fnsave' to save the state cannot take advantage of this, since the
+ state saving also trashes the state.
+
+In other words, there is now an actual _design_ to the FPU state saving,
+rather than just random historical baggage. Hopefully it's easier to
+follow as a result.
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/i387.h | 110 ++++++++++++++++++++++++++++++++++++-------
+ arch/x86/kernel/process_32.c | 5 +
+ arch/x86/kernel/process_64.c | 5 +
+ arch/x86/kernel/traps.c | 55 ++++++++++++---------
+ 4 files changed, 133 insertions(+), 42 deletions(-)
+
+--- a/arch/x86/include/asm/i387.h
++++ b/arch/x86/include/asm/i387.h
+@@ -29,6 +29,7 @@ extern unsigned int sig_xstate_size;
+ extern void fpu_init(void);
+ extern void mxcsr_feature_mask_init(void);
+ extern int init_fpu(struct task_struct *child);
++extern void __math_state_restore(struct task_struct *);
+ extern void math_state_restore(void);
+ extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
+
+@@ -212,9 +213,10 @@ static inline void fpu_fxsave(struct fpu
+ #endif /* CONFIG_X86_64 */
+
+ /*
+- * These must be called with preempt disabled
++ * These must be called with preempt disabled. Returns
++ * 'true' if the FPU state is still intact.
+ */
+-static inline void fpu_save_init(struct fpu *fpu)
++static inline int fpu_save_init(struct fpu *fpu)
+ {
+ if (use_xsave()) {
+ fpu_xsave(fpu);
+@@ -223,22 +225,33 @@ static inline void fpu_save_init(struct
+ * xsave header may indicate the init state of the FP.
+ */
+ if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
+- return;
++ return 1;
+ } else if (use_fxsr()) {
+ fpu_fxsave(fpu);
+ } else {
+ asm volatile("fnsave %[fx]; fwait"
+ : [fx] "=m" (fpu->state->fsave));
+- return;
++ return 0;
+ }
+
+- if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES))
++ /*
++ * If exceptions are pending, we need to clear them so
++ * that we don't randomly get exceptions later.
++ *
++ * FIXME! Is this perhaps only true for the old-style
++ * irq13 case? Maybe we could leave the x87 state
++ * intact otherwise?
++ */
++ if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
+ asm volatile("fnclex");
++ return 0;
++ }
++ return 1;
+ }
+
+-static inline void __save_init_fpu(struct task_struct *tsk)
++static inline int __save_init_fpu(struct task_struct *tsk)
+ {
+- fpu_save_init(&tsk->thread.fpu);
++ return fpu_save_init(&tsk->thread.fpu);
+ }
+
+ static inline int fpu_fxrstor_checking(struct fpu *fpu)
+@@ -301,20 +314,79 @@ static inline void __thread_fpu_begin(st
+ }
+
+ /*
+- * Signal frame handlers...
++ * FPU state switching for scheduling.
++ *
++ * This is a two-stage process:
++ *
++ * - switch_fpu_prepare() saves the old state and
++ * sets the new state of the CR0.TS bit. This is
++ * done within the context of the old process.
++ *
++ * - switch_fpu_finish() restores the new state as
++ * necessary.
+ */
+-extern int save_i387_xstate(void __user *buf);
+-extern int restore_i387_xstate(void __user *buf);
++typedef struct { int preload; } fpu_switch_t;
++
++/*
++ * FIXME! We could do a totally lazy restore, but we need to
++ * add a per-cpu "this was the task that last touched the FPU
++ * on this CPU" variable, and the task needs to have a "I last
++ * touched the FPU on this CPU" and check them.
++ *
++ * We don't do that yet, so "fpu_lazy_restore()" always returns
++ * false, but some day..
++ */
++#define fpu_lazy_restore(tsk) (0)
++#define fpu_lazy_state_intact(tsk) do { } while (0)
+
+-static inline void __unlazy_fpu(struct task_struct *tsk)
++static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new)
+ {
+- if (__thread_has_fpu(tsk)) {
+- __save_init_fpu(tsk);
+- __thread_fpu_end(tsk);
+- } else
+- tsk->fpu_counter = 0;
++ fpu_switch_t fpu;
++
++ fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
++ if (__thread_has_fpu(old)) {
++ if (__save_init_fpu(old))
++ fpu_lazy_state_intact(old);
++ __thread_clear_has_fpu(old);
++ old->fpu_counter++;
++
++ /* Don't change CR0.TS if we just switch! */
++ if (fpu.preload) {
++ __thread_set_has_fpu(new);
++ prefetch(new->thread.fpu.state);
++ } else
++ stts();
++ } else {
++ old->fpu_counter = 0;
++ if (fpu.preload) {
++ if (fpu_lazy_restore(new))
++ fpu.preload = 0;
++ else
++ prefetch(new->thread.fpu.state);
++ __thread_fpu_begin(new);
++ }
++ }
++ return fpu;
++}
++
++/*
++ * By the time this gets called, we've already cleared CR0.TS and
++ * given the process the FPU if we are going to preload the FPU
++ * state - all we need to do is to conditionally restore the register
++ * state itself.
++ */
++static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
++{
++ if (fpu.preload)
++ __math_state_restore(new);
+ }
+
++/*
++ * Signal frame handlers...
++ */
++extern int save_i387_xstate(void __user *buf);
++extern int restore_i387_xstate(void __user *buf);
++
+ static inline void __clear_fpu(struct task_struct *tsk)
+ {
+ if (__thread_has_fpu(tsk)) {
+@@ -474,7 +546,11 @@ static inline void save_init_fpu(struct
+ static inline void unlazy_fpu(struct task_struct *tsk)
+ {
+ preempt_disable();
+- __unlazy_fpu(tsk);
++ if (__thread_has_fpu(tsk)) {
++ __save_init_fpu(tsk);
++ __thread_fpu_end(tsk);
++ } else
++ tsk->fpu_counter = 0;
+ preempt_enable();
+ }
+
+--- a/arch/x86/kernel/process_32.c
++++ b/arch/x86/kernel/process_32.c
+@@ -297,10 +297,11 @@ __switch_to(struct task_struct *prev_p,
+ *next = &next_p->thread;
+ int cpu = smp_processor_id();
+ struct tss_struct *tss = &per_cpu(init_tss, cpu);
++ fpu_switch_t fpu;
+
+ /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
+
+- __unlazy_fpu(prev_p);
++ fpu = switch_fpu_prepare(prev_p, next_p);
+
+ /*
+ * Reload esp0.
+@@ -355,6 +356,8 @@ __switch_to(struct task_struct *prev_p,
+ if (prev->gs | next->gs)
+ lazy_load_gs(next->gs);
+
++ switch_fpu_finish(next_p, fpu);
++
+ percpu_write(current_task, next_p);
+
+ return prev_p;
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -381,8 +381,9 @@ __switch_to(struct task_struct *prev_p,
+ int cpu = smp_processor_id();
+ struct tss_struct *tss = &per_cpu(init_tss, cpu);
+ unsigned fsindex, gsindex;
++ fpu_switch_t fpu;
+
+- __unlazy_fpu(prev_p);
++ fpu = switch_fpu_prepare(prev_p, next_p);
+
+ /*
+ * Reload esp0, LDT and the page table pointer:
+@@ -452,6 +453,8 @@ __switch_to(struct task_struct *prev_p,
+ wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
+ prev->gsindex = gsindex;
+
++ switch_fpu_finish(next_p, fpu);
++
+ /*
+ * Switch the PDA and FPU contexts.
+ */
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -562,6 +562,37 @@ asmlinkage void __attribute__((weak)) sm
+ }
+
+ /*
++ * This gets called with the process already owning the
++ * FPU state, and with CR0.TS cleared. It just needs to
++ * restore the FPU register state.
++ */
++void __math_state_restore(struct task_struct *tsk)
++{
++ /* We need a safe address that is cheap to find and that is already
++ in L1. We've just brought in "tsk->thread.has_fpu", so use that */
++#define safe_address (tsk->thread.has_fpu)
++
++ /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
++ is pending. Clear the x87 state here by setting it to fixed
++ values. safe_address is a random variable that should be in L1 */
++ alternative_input(
++ ASM_NOP8 ASM_NOP2,
++ "emms\n\t" /* clear stack tags */
++ "fildl %P[addr]", /* set F?P to defined value */
++ X86_FEATURE_FXSAVE_LEAK,
++ [addr] "m" (safe_address));
++
++ /*
++ * Paranoid restore. send a SIGSEGV if we fail to restore the state.
++ */
++ if (unlikely(restore_fpu_checking(tsk))) {
++ __thread_fpu_end(tsk);
++ force_sig(SIGSEGV, tsk);
++ return;
++ }
++}
++
++/*
+ * 'math_state_restore()' saves the current math information in the
+ * old math state array, and gets the new ones from the current task
+ *
+@@ -575,10 +606,6 @@ void math_state_restore(void)
+ {
+ struct task_struct *tsk = current;
+
+- /* We need a safe address that is cheap to find and that is already
+- in L1. We're just bringing in "tsk->thread.has_fpu", so use that */
+-#define safe_address (tsk->thread.has_fpu)
+-
+ if (!tsk_used_math(tsk)) {
+ local_irq_enable();
+ /*
+@@ -595,25 +622,7 @@ void math_state_restore(void)
+ }
+
+ __thread_fpu_begin(tsk);
+-
+- /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
+- is pending. Clear the x87 state here by setting it to fixed
+- values. safe_address is a random variable that should be in L1 */
+- alternative_input(
+- ASM_NOP8 ASM_NOP2,
+- "emms\n\t" /* clear stack tags */
+- "fildl %P[addr]", /* set F?P to defined value */
+- X86_FEATURE_FXSAVE_LEAK,
+- [addr] "m" (safe_address));
+-
+- /*
+- * Paranoid restore. send a SIGSEGV if we fail to restore the state.
+- */
+- if (unlikely(restore_fpu_checking(tsk))) {
+- __thread_fpu_end(tsk);
+- force_sig(SIGSEGV, tsk);
+- return;
+- }
++ __math_state_restore(tsk);
+
+ tsk->fpu_counter++;
+ }