--- /dev/null
+From 37868fe113ff2ba814b3b4eb12df214df555f8dc Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 30 Jul 2015 14:31:32 -0700
+Subject: x86/ldt: Make modify_ldt synchronous
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 37868fe113ff2ba814b3b4eb12df214df555f8dc upstream.
+
+modify_ldt() has questionable locking and does not synchronize
+threads. Improve it: redesign the locking and synchronize all
+threads' LDTs using an IPI on all modifications.
+
+This will dramatically slow down modify_ldt in multithreaded
+programs, but there shouldn't be any multithreaded programs that
+care about modify_ldt's performance in the first place.
+
+This fixes some fallout from the CVE-2015-5157 fixes.
+
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: Andrew Cooper <andrew.cooper3@citrix.com>
+Cc: Andy Lutomirski <luto@amacapital.net>
+Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Jan Beulich <jbeulich@suse.com>
+Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Sasha Levin <sasha.levin@oracle.com>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: security@kernel.org <security@kernel.org>
+Cc: xen-devel <xen-devel@lists.xen.org>
+Link: http://lkml.kernel.org/r/4c6978476782160600471bd865b318db34c7b628.1438291540.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/desc.h | 15 --
+ arch/x86/include/asm/mmu.h | 3
+ arch/x86/include/asm/mmu_context.h | 54 ++++++-
+ arch/x86/kernel/cpu/common.c | 4
+ arch/x86/kernel/cpu/perf_event.c | 12 +
+ arch/x86/kernel/ldt.c | 264 ++++++++++++++++++++-----------------
+ arch/x86/kernel/process_64.c | 4
+ arch/x86/kernel/step.c | 6
+ arch/x86/power/cpu.c | 3
+ 9 files changed, 211 insertions(+), 154 deletions(-)
+
+--- a/arch/x86/include/asm/desc.h
++++ b/arch/x86/include/asm/desc.h
+@@ -280,21 +280,6 @@ static inline void clear_LDT(void)
+ set_ldt(NULL, 0);
+ }
+
+-/*
+- * load one particular LDT into the current CPU
+- */
+-static inline void load_LDT_nolock(mm_context_t *pc)
+-{
+- set_ldt(pc->ldt, pc->size);
+-}
+-
+-static inline void load_LDT(mm_context_t *pc)
+-{
+- preempt_disable();
+- load_LDT_nolock(pc);
+- preempt_enable();
+-}
+-
+ static inline unsigned long get_desc_base(const struct desc_struct *desc)
+ {
+ return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
+--- a/arch/x86/include/asm/mmu.h
++++ b/arch/x86/include/asm/mmu.h
+@@ -9,8 +9,7 @@
+ * we put the segment information here.
+ */
+ typedef struct {
+- void *ldt;
+- int size;
++ struct ldt_struct *ldt;
+
+ #ifdef CONFIG_X86_64
+ /* True if mm supports a task running in 32 bit compatibility mode. */
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -34,6 +34,50 @@ static inline void load_mm_cr4(struct mm
+ #endif
+
+ /*
++ * ldt_structs can be allocated, used, and freed, but they are never
++ * modified while live.
++ */
++struct ldt_struct {
++ /*
++ * Xen requires page-aligned LDTs with special permissions. This is
++ * needed to prevent us from installing evil descriptors such as
++ * call gates. On native, we could merge the ldt_struct and LDT
++ * allocations, but it's not worth trying to optimize.
++ */
++ struct desc_struct *entries;
++ int size;
++};
++
++static inline void load_mm_ldt(struct mm_struct *mm)
++{
++ struct ldt_struct *ldt;
++
++ /* lockless_dereference synchronizes with smp_store_release */
++ ldt = lockless_dereference(mm->context.ldt);
++
++ /*
++ * Any change to mm->context.ldt is followed by an IPI to all
++ * CPUs with the mm active. The LDT will not be freed until
++ * after the IPI is handled by all such CPUs. This means that,
++ * if the ldt_struct changes before we return, the values we see
++ * will be safe, and the new values will be loaded before we run
++ * any user code.
++ *
++ * NB: don't try to convert this to use RCU without extreme care.
++ * We would still need IRQs off, because we don't want to change
++ * the local LDT after an IPI loaded a newer value than the one
++ * that we can see.
++ */
++
++ if (unlikely(ldt))
++ set_ldt(ldt->entries, ldt->size);
++ else
++ clear_LDT();
++
++ DEBUG_LOCKS_WARN_ON(preemptible());
++}
++
++/*
+ * Used for LDT copy/destruction.
+ */
+ int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
+@@ -78,12 +122,12 @@ static inline void switch_mm(struct mm_s
+ * was called and then modify_ldt changed
+ * prev->context.ldt but suppressed an IPI to this CPU.
+ * In this case, prev->context.ldt != NULL, because we
+- * never free an LDT while the mm still exists. That
+- * means that next->context.ldt != prev->context.ldt,
+- * because mms never share an LDT.
++ * never set context.ldt to NULL while the mm still
++ * exists. That means that next->context.ldt !=
++ * prev->context.ldt, because mms never share an LDT.
+ */
+ if (unlikely(prev->context.ldt != next->context.ldt))
+- load_LDT_nolock(&next->context);
++ load_mm_ldt(next);
+ }
+ #ifdef CONFIG_SMP
+ else {
+@@ -106,7 +150,7 @@ static inline void switch_mm(struct mm_s
+ load_cr3(next->pgd);
+ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+ load_mm_cr4(next);
+- load_LDT_nolock(&next->context);
++ load_mm_ldt(next);
+ }
+ }
+ #endif
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1434,7 +1434,7 @@ void cpu_init(void)
+ load_sp0(t, ¤t->thread);
+ set_tss_desc(cpu, t);
+ load_TR_desc();
+- load_LDT(&init_mm.context);
++ load_mm_ldt(&init_mm);
+
+ clear_all_debug_regs();
+ dbg_restore_debug_regs();
+@@ -1483,7 +1483,7 @@ void cpu_init(void)
+ load_sp0(t, thread);
+ set_tss_desc(cpu, t);
+ load_TR_desc();
+- load_LDT(&init_mm.context);
++ load_mm_ldt(&init_mm);
+
+ t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+
+--- a/arch/x86/kernel/cpu/perf_event.c
++++ b/arch/x86/kernel/cpu/perf_event.c
+@@ -2170,21 +2170,25 @@ static unsigned long get_segment_base(un
+ int idx = segment >> 3;
+
+ if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
++ struct ldt_struct *ldt;
++
+ if (idx > LDT_ENTRIES)
+ return 0;
+
+- if (idx > current->active_mm->context.size)
++ /* IRQs are off, so this synchronizes with smp_store_release */
++ ldt = lockless_dereference(current->active_mm->context.ldt);
++ if (!ldt || idx > ldt->size)
+ return 0;
+
+- desc = current->active_mm->context.ldt;
++ desc = &ldt->entries[idx];
+ } else {
+ if (idx > GDT_ENTRIES)
+ return 0;
+
+- desc = raw_cpu_ptr(gdt_page.gdt);
++ desc = raw_cpu_ptr(gdt_page.gdt) + idx;
+ }
+
+- return get_desc_base(desc + idx);
++ return get_desc_base(desc);
+ }
+
+ #ifdef CONFIG_COMPAT
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -12,6 +12,7 @@
+ #include <linux/string.h>
+ #include <linux/mm.h>
+ #include <linux/smp.h>
++#include <linux/slab.h>
+ #include <linux/vmalloc.h>
+ #include <linux/uaccess.h>
+
+@@ -20,82 +21,82 @@
+ #include <asm/mmu_context.h>
+ #include <asm/syscalls.h>
+
+-#ifdef CONFIG_SMP
++/* context.lock is held for us, so we don't need any locking. */
+ static void flush_ldt(void *current_mm)
+ {
+- if (current->active_mm == current_mm)
+- load_LDT(¤t->active_mm->context);
++ mm_context_t *pc;
++
++ if (current->active_mm != current_mm)
++ return;
++
++ pc = ¤t->active_mm->context;
++ set_ldt(pc->ldt->entries, pc->ldt->size);
+ }
+-#endif
+
+-static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
++/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
++static struct ldt_struct *alloc_ldt_struct(int size)
+ {
+- void *oldldt, *newldt;
+- int oldsize;
++ struct ldt_struct *new_ldt;
++ int alloc_size;
+
+- if (mincount <= pc->size)
+- return 0;
+- oldsize = pc->size;
+- mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) &
+- (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1));
+- if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
+- newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
+- else
+- newldt = (void *)__get_free_page(GFP_KERNEL);
++ if (size > LDT_ENTRIES)
++ return NULL;
+
+- if (!newldt)
+- return -ENOMEM;
++ new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
++ if (!new_ldt)
++ return NULL;
++
++ BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
++ alloc_size = size * LDT_ENTRY_SIZE;
++
++ /*
++ * Xen is very picky: it requires a page-aligned LDT that has no
++ * trailing nonzero bytes in any page that contains LDT descriptors.
++ * Keep it simple: zero the whole allocation and never allocate less
++ * than PAGE_SIZE.
++ */
++ if (alloc_size > PAGE_SIZE)
++ new_ldt->entries = vzalloc(alloc_size);
++ else
++ new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL);
+
+- if (oldsize)
+- memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE);
+- oldldt = pc->ldt;
+- memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
+- (mincount - oldsize) * LDT_ENTRY_SIZE);
+-
+- paravirt_alloc_ldt(newldt, mincount);
+-
+-#ifdef CONFIG_X86_64
+- /* CHECKME: Do we really need this ? */
+- wmb();
+-#endif
+- pc->ldt = newldt;
+- wmb();
+- pc->size = mincount;
+- wmb();
+-
+- if (reload) {
+-#ifdef CONFIG_SMP
+- preempt_disable();
+- load_LDT(pc);
+- if (!cpumask_equal(mm_cpumask(current->mm),
+- cpumask_of(smp_processor_id())))
+- smp_call_function(flush_ldt, current->mm, 1);
+- preempt_enable();
+-#else
+- load_LDT(pc);
+-#endif
+- }
+- if (oldsize) {
+- paravirt_free_ldt(oldldt, oldsize);
+- if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
+- vfree(oldldt);
+- else
+- put_page(virt_to_page(oldldt));
++ if (!new_ldt->entries) {
++ kfree(new_ldt);
++ return NULL;
+ }
+- return 0;
++
++ new_ldt->size = size;
++ return new_ldt;
+ }
+
+-static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
++/* After calling this, the LDT is immutable. */
++static void finalize_ldt_struct(struct ldt_struct *ldt)
+ {
+- int err = alloc_ldt(new, old->size, 0);
+- int i;
++ paravirt_alloc_ldt(ldt->entries, ldt->size);
++}
++
++/* context.lock is held */
++static void install_ldt(struct mm_struct *current_mm,
++ struct ldt_struct *ldt)
++{
++ /* Synchronizes with lockless_dereference in load_mm_ldt. */
++ smp_store_release(¤t_mm->context.ldt, ldt);
++
++ /* Activate the LDT for all CPUs using current_mm. */
++ on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true);
++}
+
+- if (err < 0)
+- return err;
++static void free_ldt_struct(struct ldt_struct *ldt)
++{
++ if (likely(!ldt))
++ return;
+
+- for (i = 0; i < old->size; i++)
+- write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
+- return 0;
++ paravirt_free_ldt(ldt->entries, ldt->size);
++ if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
++ vfree(ldt->entries);
++ else
++ kfree(ldt->entries);
++ kfree(ldt);
+ }
+
+ /*
+@@ -104,17 +105,37 @@ static inline int copy_ldt(mm_context_t
+ */
+ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+ {
++ struct ldt_struct *new_ldt;
+ struct mm_struct *old_mm;
+ int retval = 0;
+
+ mutex_init(&mm->context.lock);
+- mm->context.size = 0;
+ old_mm = current->mm;
+- if (old_mm && old_mm->context.size > 0) {
+- mutex_lock(&old_mm->context.lock);
+- retval = copy_ldt(&mm->context, &old_mm->context);
+- mutex_unlock(&old_mm->context.lock);
++ if (!old_mm) {
++ mm->context.ldt = NULL;
++ return 0;
+ }
++
++ mutex_lock(&old_mm->context.lock);
++ if (!old_mm->context.ldt) {
++ mm->context.ldt = NULL;
++ goto out_unlock;
++ }
++
++ new_ldt = alloc_ldt_struct(old_mm->context.ldt->size);
++ if (!new_ldt) {
++ retval = -ENOMEM;
++ goto out_unlock;
++ }
++
++ memcpy(new_ldt->entries, old_mm->context.ldt->entries,
++ new_ldt->size * LDT_ENTRY_SIZE);
++ finalize_ldt_struct(new_ldt);
++
++ mm->context.ldt = new_ldt;
++
++out_unlock:
++ mutex_unlock(&old_mm->context.lock);
+ return retval;
+ }
+
+@@ -125,53 +146,47 @@ int init_new_context(struct task_struct
+ */
+ void destroy_context(struct mm_struct *mm)
+ {
+- if (mm->context.size) {
+-#ifdef CONFIG_X86_32
+- /* CHECKME: Can this ever happen ? */
+- if (mm == current->active_mm)
+- clear_LDT();
+-#endif
+- paravirt_free_ldt(mm->context.ldt, mm->context.size);
+- if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
+- vfree(mm->context.ldt);
+- else
+- put_page(virt_to_page(mm->context.ldt));
+- mm->context.size = 0;
+- }
++ free_ldt_struct(mm->context.ldt);
++ mm->context.ldt = NULL;
+ }
+
+ static int read_ldt(void __user *ptr, unsigned long bytecount)
+ {
+- int err;
++ int retval;
+ unsigned long size;
+ struct mm_struct *mm = current->mm;
+
+- if (!mm->context.size)
+- return 0;
++ mutex_lock(&mm->context.lock);
++
++ if (!mm->context.ldt) {
++ retval = 0;
++ goto out_unlock;
++ }
++
+ if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
+ bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
+
+- mutex_lock(&mm->context.lock);
+- size = mm->context.size * LDT_ENTRY_SIZE;
++ size = mm->context.ldt->size * LDT_ENTRY_SIZE;
+ if (size > bytecount)
+ size = bytecount;
+
+- err = 0;
+- if (copy_to_user(ptr, mm->context.ldt, size))
+- err = -EFAULT;
+- mutex_unlock(&mm->context.lock);
+- if (err < 0)
+- goto error_return;
++ if (copy_to_user(ptr, mm->context.ldt->entries, size)) {
++ retval = -EFAULT;
++ goto out_unlock;
++ }
++
+ if (size != bytecount) {
+- /* zero-fill the rest */
+- if (clear_user(ptr + size, bytecount - size) != 0) {
+- err = -EFAULT;
+- goto error_return;
++ /* Zero-fill the rest and pretend we read bytecount bytes. */
++ if (clear_user(ptr + size, bytecount - size)) {
++ retval = -EFAULT;
++ goto out_unlock;
+ }
+ }
+- return bytecount;
+-error_return:
+- return err;
++ retval = bytecount;
++
++out_unlock:
++ mutex_unlock(&mm->context.lock);
++ return retval;
+ }
+
+ static int read_default_ldt(void __user *ptr, unsigned long bytecount)
+@@ -195,6 +210,8 @@ static int write_ldt(void __user *ptr, u
+ struct desc_struct ldt;
+ int error;
+ struct user_desc ldt_info;
++ int oldsize, newsize;
++ struct ldt_struct *new_ldt, *old_ldt;
+
+ error = -EINVAL;
+ if (bytecount != sizeof(ldt_info))
+@@ -213,34 +230,39 @@ static int write_ldt(void __user *ptr, u
+ goto out;
+ }
+
+- mutex_lock(&mm->context.lock);
+- if (ldt_info.entry_number >= mm->context.size) {
+- error = alloc_ldt(¤t->mm->context,
+- ldt_info.entry_number + 1, 1);
+- if (error < 0)
+- goto out_unlock;
+- }
+-
+- /* Allow LDTs to be cleared by the user. */
+- if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+- if (oldmode || LDT_empty(&ldt_info)) {
+- memset(&ldt, 0, sizeof(ldt));
+- goto install;
++ if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) ||
++ LDT_empty(&ldt_info)) {
++ /* The user wants to clear the entry. */
++ memset(&ldt, 0, sizeof(ldt));
++ } else {
++ if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
++ error = -EINVAL;
++ goto out;
+ }
++
++ fill_ldt(&ldt, &ldt_info);
++ if (oldmode)
++ ldt.avl = 0;
+ }
+
+- if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
+- error = -EINVAL;
++ mutex_lock(&mm->context.lock);
++
++ old_ldt = mm->context.ldt;
++ oldsize = old_ldt ? old_ldt->size : 0;
++ newsize = max((int)(ldt_info.entry_number + 1), oldsize);
++
++ error = -ENOMEM;
++ new_ldt = alloc_ldt_struct(newsize);
++ if (!new_ldt)
+ goto out_unlock;
+- }
+
+- fill_ldt(&ldt, &ldt_info);
+- if (oldmode)
+- ldt.avl = 0;
+-
+- /* Install the new entry ... */
+-install:
+- write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt);
++ if (old_ldt)
++ memcpy(new_ldt->entries, old_ldt->entries, oldsize * LDT_ENTRY_SIZE);
++ new_ldt->entries[ldt_info.entry_number] = ldt;
++ finalize_ldt_struct(new_ldt);
++
++ install_ldt(mm, new_ldt);
++ free_ldt_struct(old_ldt);
+ error = 0;
+
+ out_unlock:
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -122,11 +122,11 @@ void __show_regs(struct pt_regs *regs, i
+ void release_thread(struct task_struct *dead_task)
+ {
+ if (dead_task->mm) {
+- if (dead_task->mm->context.size) {
++ if (dead_task->mm->context.ldt) {
+ pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
+ dead_task->comm,
+ dead_task->mm->context.ldt,
+- dead_task->mm->context.size);
++ dead_task->mm->context.ldt->size);
+ BUG();
+ }
+ }
+--- a/arch/x86/kernel/step.c
++++ b/arch/x86/kernel/step.c
+@@ -5,6 +5,7 @@
+ #include <linux/mm.h>
+ #include <linux/ptrace.h>
+ #include <asm/desc.h>
++#include <asm/mmu_context.h>
+
+ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs)
+ {
+@@ -30,10 +31,11 @@ unsigned long convert_ip_to_linear(struc
+ seg &= ~7UL;
+
+ mutex_lock(&child->mm->context.lock);
+- if (unlikely((seg >> 3) >= child->mm->context.size))
++ if (unlikely(!child->mm->context.ldt ||
++ (seg >> 3) >= child->mm->context.ldt->size))
+ addr = -1L; /* bogus selector, access would fault */
+ else {
+- desc = child->mm->context.ldt + seg;
++ desc = &child->mm->context.ldt->entries[seg];
+ base = get_desc_base(desc);
+
+ /* 16-bit code segment? */
+--- a/arch/x86/power/cpu.c
++++ b/arch/x86/power/cpu.c
+@@ -23,6 +23,7 @@
+ #include <asm/debugreg.h>
+ #include <asm/fpu-internal.h> /* pcntxt_mask */
+ #include <asm/cpu.h>
++#include <asm/mmu_context.h>
+
+ #ifdef CONFIG_X86_32
+ __visible unsigned long saved_context_ebx;
+@@ -154,7 +155,7 @@ static void fix_processor_context(void)
+ syscall_init(); /* This sets MSR_*STAR and related */
+ #endif
+ load_TR_desc(); /* This does ltr */
+- load_LDT(¤t->active_mm->context); /* This does lldt */
++ load_mm_ldt(current->active_mm); /* This does lldt */
+ }
+
+ /**