--- /dev/null
+Subject: be more aggressive about de-activating mm-s under destruction
+From: jbeulich@novell.com
+Patch-mainline: obsolete
+
+... by not only handling the current task on the CPU arch_exit_mmap()
+gets executed on, but also forcing remote CPUs to do so.
+
+Index: head-2008-12-01/arch/x86/mm/pgtable-xen.c
+===================================================================
+--- head-2008-12-01.orig/arch/x86/mm/pgtable-xen.c 2008-12-01 12:13:06.000000000 +0100
++++ head-2008-12-01/arch/x86/mm/pgtable-xen.c 2008-12-01 12:13:16.000000000 +0100
+@@ -418,27 +418,44 @@ void arch_dup_mmap(struct mm_struct *old
+ mm_pin(mm);
+ }
+
+-void arch_exit_mmap(struct mm_struct *mm)
++/*
++ * We aggressively remove defunct pgd from cr3. We execute unmap_vmas() *much*
++ * faster this way, as no hypercalls are needed for the page table updates.
++ */
++static void leave_active_mm(struct task_struct *tsk, struct mm_struct *mm)
++ __releases(tsk->alloc_lock)
+ {
+- struct task_struct *tsk = current;
+-
+- task_lock(tsk);
+-
+- /*
+- * We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
+- * *much* faster this way, as no tlb flushes means bigger wrpt batches.
+- */
+ if (tsk->active_mm == mm) {
+ tsk->active_mm = &init_mm;
+ atomic_inc(&init_mm.mm_count);
+
+ switch_mm(mm, &init_mm, tsk);
+
+- atomic_dec(&mm->mm_count);
+- BUG_ON(atomic_read(&mm->mm_count) == 0);
++ if (atomic_dec_and_test(&mm->mm_count))
++ BUG();
+ }
+
+ task_unlock(tsk);
++}
++
++static void _leave_active_mm(void *mm)
++{
++ struct task_struct *tsk = current;
++
++ if (spin_trylock(&tsk->alloc_lock))
++ leave_active_mm(tsk, mm);
++}
++
++void arch_exit_mmap(struct mm_struct *mm)
++{
++ struct task_struct *tsk = current;
++
++ task_lock(tsk);
++ leave_active_mm(tsk, mm);
++
++ preempt_disable();
++ smp_call_function_mask(mm->cpu_vm_mask, _leave_active_mm, mm, 1);
++ preempt_enable();
+
+ if (PagePinned(virt_to_page(mm->pgd))
+ && atomic_read(&mm->mm_count) == 1