+++ /dev/null
-From 37868fe113ff2ba814b3b4eb12df214df555f8dc Mon Sep 17 00:00:00 2001
-From: Andy Lutomirski <luto@kernel.org>
-Date: Thu, 30 Jul 2015 14:31:32 -0700
-Subject: x86/ldt: Make modify_ldt synchronous
-
-From: Andy Lutomirski <luto@kernel.org>
-
-commit 37868fe113ff2ba814b3b4eb12df214df555f8dc upstream.
-
-modify_ldt() has questionable locking and does not synchronize
-threads. Improve it: redesign the locking and synchronize all
-threads' LDTs using an IPI on all modifications.
-
-This will dramatically slow down modify_ldt in multithreaded
-programs, but there shouldn't be any multithreaded programs that
-care about modify_ldt's performance in the first place.
-
-This fixes some fallout from the CVE-2015-5157 fixes.
-
-Signed-off-by: Andy Lutomirski <luto@kernel.org>
-Reviewed-by: Borislav Petkov <bp@suse.de>
-Cc: Andrew Cooper <andrew.cooper3@citrix.com>
-Cc: Andy Lutomirski <luto@amacapital.net>
-Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
-Cc: Borislav Petkov <bp@alien8.de>
-Cc: Brian Gerst <brgerst@gmail.com>
-Cc: Denys Vlasenko <dvlasenk@redhat.com>
-Cc: H. Peter Anvin <hpa@zytor.com>
-Cc: Jan Beulich <jbeulich@suse.com>
-Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Sasha Levin <sasha.levin@oracle.com>
-Cc: Steven Rostedt <rostedt@goodmis.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: security@kernel.org <security@kernel.org>
-Cc: xen-devel <xen-devel@lists.xen.org>
-Link: http://lkml.kernel.org/r/4c6978476782160600471bd865b318db34c7b628.1438291540.git.luto@kernel.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-
----
- arch/x86/include/asm/desc.h | 15 --
- arch/x86/include/asm/mmu.h | 3
- arch/x86/include/asm/mmu_context.h | 54 ++++++-
- arch/x86/kernel/cpu/common.c | 4
- arch/x86/kernel/cpu/perf_event.c | 12 +
- arch/x86/kernel/ldt.c | 264 ++++++++++++++++++++-----------------
- arch/x86/kernel/process_64.c | 4
- arch/x86/kernel/step.c | 6
- arch/x86/power/cpu.c | 3
- 9 files changed, 211 insertions(+), 154 deletions(-)
-
---- a/arch/x86/include/asm/desc.h
-+++ b/arch/x86/include/asm/desc.h
-@@ -280,21 +280,6 @@ static inline void clear_LDT(void)
- set_ldt(NULL, 0);
- }
-
--/*
-- * load one particular LDT into the current CPU
-- */
--static inline void load_LDT_nolock(mm_context_t *pc)
--{
-- set_ldt(pc->ldt, pc->size);
--}
--
--static inline void load_LDT(mm_context_t *pc)
--{
-- preempt_disable();
-- load_LDT_nolock(pc);
-- preempt_enable();
--}
--
- static inline unsigned long get_desc_base(const struct desc_struct *desc)
- {
- return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
---- a/arch/x86/include/asm/mmu.h
-+++ b/arch/x86/include/asm/mmu.h
-@@ -9,8 +9,7 @@
- * we put the segment information here.
- */
- typedef struct {
-- void *ldt;
-- int size;
-+ struct ldt_struct *ldt;
-
- #ifdef CONFIG_X86_64
- /* True if mm supports a task running in 32 bit compatibility mode. */
---- a/arch/x86/include/asm/mmu_context.h
-+++ b/arch/x86/include/asm/mmu_context.h
-@@ -34,6 +34,50 @@ static inline void load_mm_cr4(struct mm
- #endif
-
- /*
-+ * ldt_structs can be allocated, used, and freed, but they are never
-+ * modified while live.
-+ */
-+struct ldt_struct {
-+ /*
-+ * Xen requires page-aligned LDTs with special permissions. This is
-+ * needed to prevent us from installing evil descriptors such as
-+ * call gates. On native, we could merge the ldt_struct and LDT
-+ * allocations, but it's not worth trying to optimize.
-+ */
-+ struct desc_struct *entries;
-+ int size;
-+};
-+
-+static inline void load_mm_ldt(struct mm_struct *mm)
-+{
-+ struct ldt_struct *ldt;
-+
-+ /* lockless_dereference synchronizes with smp_store_release */
-+ ldt = lockless_dereference(mm->context.ldt);
-+
-+ /*
-+ * Any change to mm->context.ldt is followed by an IPI to all
-+ * CPUs with the mm active. The LDT will not be freed until
-+ * after the IPI is handled by all such CPUs. This means that,
-+ * if the ldt_struct changes before we return, the values we see
-+ * will be safe, and the new values will be loaded before we run
-+ * any user code.
-+ *
-+ * NB: don't try to convert this to use RCU without extreme care.
-+ * We would still need IRQs off, because we don't want to change
-+ * the local LDT after an IPI loaded a newer value than the one
-+ * that we can see.
-+ */
-+
-+ if (unlikely(ldt))
-+ set_ldt(ldt->entries, ldt->size);
-+ else
-+ clear_LDT();
-+
-+ DEBUG_LOCKS_WARN_ON(preemptible());
-+}
-+
-+/*
- * Used for LDT copy/destruction.
- */
- int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
-@@ -78,12 +122,12 @@ static inline void switch_mm(struct mm_s
- * was called and then modify_ldt changed
- * prev->context.ldt but suppressed an IPI to this CPU.
- * In this case, prev->context.ldt != NULL, because we
-- * never free an LDT while the mm still exists. That
-- * means that next->context.ldt != prev->context.ldt,
-- * because mms never share an LDT.
-+ * never set context.ldt to NULL while the mm still
-+ * exists. That means that next->context.ldt !=
-+ * prev->context.ldt, because mms never share an LDT.
- */
- if (unlikely(prev->context.ldt != next->context.ldt))
-- load_LDT_nolock(&next->context);
-+ load_mm_ldt(next);
- }
- #ifdef CONFIG_SMP
- else {
-@@ -106,7 +150,7 @@ static inline void switch_mm(struct mm_s
- load_cr3(next->pgd);
- trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
- load_mm_cr4(next);
-- load_LDT_nolock(&next->context);
-+ load_mm_ldt(next);
- }
- }
- #endif
---- a/arch/x86/kernel/cpu/common.c
-+++ b/arch/x86/kernel/cpu/common.c
-@@ -1434,7 +1434,7 @@ void cpu_init(void)
- load_sp0(t, ¤t->thread);
- set_tss_desc(cpu, t);
- load_TR_desc();
-- load_LDT(&init_mm.context);
-+ load_mm_ldt(&init_mm);
-
- clear_all_debug_regs();
- dbg_restore_debug_regs();
-@@ -1483,7 +1483,7 @@ void cpu_init(void)
- load_sp0(t, thread);
- set_tss_desc(cpu, t);
- load_TR_desc();
-- load_LDT(&init_mm.context);
-+ load_mm_ldt(&init_mm);
-
- t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
-
---- a/arch/x86/kernel/cpu/perf_event.c
-+++ b/arch/x86/kernel/cpu/perf_event.c
-@@ -2170,21 +2170,25 @@ static unsigned long get_segment_base(un
- int idx = segment >> 3;
-
- if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
-+ struct ldt_struct *ldt;
-+
- if (idx > LDT_ENTRIES)
- return 0;
-
-- if (idx > current->active_mm->context.size)
-+ /* IRQs are off, so this synchronizes with smp_store_release */
-+ ldt = lockless_dereference(current->active_mm->context.ldt);
-+ if (!ldt || idx > ldt->size)
- return 0;
-
-- desc = current->active_mm->context.ldt;
-+ desc = &ldt->entries[idx];
- } else {
- if (idx > GDT_ENTRIES)
- return 0;
-
-- desc = raw_cpu_ptr(gdt_page.gdt);
-+ desc = raw_cpu_ptr(gdt_page.gdt) + idx;
- }
-
-- return get_desc_base(desc + idx);
-+ return get_desc_base(desc);
- }
-
- #ifdef CONFIG_COMPAT
---- a/arch/x86/kernel/ldt.c
-+++ b/arch/x86/kernel/ldt.c
-@@ -12,6 +12,7 @@
- #include <linux/string.h>
- #include <linux/mm.h>
- #include <linux/smp.h>
-+#include <linux/slab.h>
- #include <linux/vmalloc.h>
- #include <linux/uaccess.h>
-
-@@ -20,82 +21,82 @@
- #include <asm/mmu_context.h>
- #include <asm/syscalls.h>
-
--#ifdef CONFIG_SMP
-+/* context.lock is held for us, so we don't need any locking. */
- static void flush_ldt(void *current_mm)
- {
-- if (current->active_mm == current_mm)
-- load_LDT(¤t->active_mm->context);
-+ mm_context_t *pc;
-+
-+ if (current->active_mm != current_mm)
-+ return;
-+
-+ pc = ¤t->active_mm->context;
-+ set_ldt(pc->ldt->entries, pc->ldt->size);
- }
--#endif
-
--static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
-+/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
-+static struct ldt_struct *alloc_ldt_struct(int size)
- {
-- void *oldldt, *newldt;
-- int oldsize;
-+ struct ldt_struct *new_ldt;
-+ int alloc_size;
-
-- if (mincount <= pc->size)
-- return 0;
-- oldsize = pc->size;
-- mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) &
-- (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1));
-- if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
-- newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
-- else
-- newldt = (void *)__get_free_page(GFP_KERNEL);
-+ if (size > LDT_ENTRIES)
-+ return NULL;
-
-- if (!newldt)
-- return -ENOMEM;
-+ new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
-+ if (!new_ldt)
-+ return NULL;
-+
-+ BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
-+ alloc_size = size * LDT_ENTRY_SIZE;
-+
-+ /*
-+ * Xen is very picky: it requires a page-aligned LDT that has no
-+ * trailing nonzero bytes in any page that contains LDT descriptors.
-+ * Keep it simple: zero the whole allocation and never allocate less
-+ * than PAGE_SIZE.
-+ */
-+ if (alloc_size > PAGE_SIZE)
-+ new_ldt->entries = vzalloc(alloc_size);
-+ else
-+ new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL);
-
-- if (oldsize)
-- memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE);
-- oldldt = pc->ldt;
-- memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
-- (mincount - oldsize) * LDT_ENTRY_SIZE);
--
-- paravirt_alloc_ldt(newldt, mincount);
--
--#ifdef CONFIG_X86_64
-- /* CHECKME: Do we really need this ? */
-- wmb();
--#endif
-- pc->ldt = newldt;
-- wmb();
-- pc->size = mincount;
-- wmb();
--
-- if (reload) {
--#ifdef CONFIG_SMP
-- preempt_disable();
-- load_LDT(pc);
-- if (!cpumask_equal(mm_cpumask(current->mm),
-- cpumask_of(smp_processor_id())))
-- smp_call_function(flush_ldt, current->mm, 1);
-- preempt_enable();
--#else
-- load_LDT(pc);
--#endif
-- }
-- if (oldsize) {
-- paravirt_free_ldt(oldldt, oldsize);
-- if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
-- vfree(oldldt);
-- else
-- put_page(virt_to_page(oldldt));
-+ if (!new_ldt->entries) {
-+ kfree(new_ldt);
-+ return NULL;
- }
-- return 0;
-+
-+ new_ldt->size = size;
-+ return new_ldt;
- }
-
--static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
-+/* After calling this, the LDT is immutable. */
-+static void finalize_ldt_struct(struct ldt_struct *ldt)
- {
-- int err = alloc_ldt(new, old->size, 0);
-- int i;
-+ paravirt_alloc_ldt(ldt->entries, ldt->size);
-+}
-+
-+/* context.lock is held */
-+static void install_ldt(struct mm_struct *current_mm,
-+ struct ldt_struct *ldt)
-+{
-+ /* Synchronizes with lockless_dereference in load_mm_ldt. */
-+ smp_store_release(¤t_mm->context.ldt, ldt);
-+
-+ /* Activate the LDT for all CPUs using current_mm. */
-+ on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true);
-+}
-
-- if (err < 0)
-- return err;
-+static void free_ldt_struct(struct ldt_struct *ldt)
-+{
-+ if (likely(!ldt))
-+ return;
-
-- for (i = 0; i < old->size; i++)
-- write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
-- return 0;
-+ paravirt_free_ldt(ldt->entries, ldt->size);
-+ if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
-+ vfree(ldt->entries);
-+ else
-+ kfree(ldt->entries);
-+ kfree(ldt);
- }
-
- /*
-@@ -104,17 +105,37 @@ static inline int copy_ldt(mm_context_t
- */
- int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
- {
-+ struct ldt_struct *new_ldt;
- struct mm_struct *old_mm;
- int retval = 0;
-
- mutex_init(&mm->context.lock);
-- mm->context.size = 0;
- old_mm = current->mm;
-- if (old_mm && old_mm->context.size > 0) {
-- mutex_lock(&old_mm->context.lock);
-- retval = copy_ldt(&mm->context, &old_mm->context);
-- mutex_unlock(&old_mm->context.lock);
-+ if (!old_mm) {
-+ mm->context.ldt = NULL;
-+ return 0;
- }
-+
-+ mutex_lock(&old_mm->context.lock);
-+ if (!old_mm->context.ldt) {
-+ mm->context.ldt = NULL;
-+ goto out_unlock;
-+ }
-+
-+ new_ldt = alloc_ldt_struct(old_mm->context.ldt->size);
-+ if (!new_ldt) {
-+ retval = -ENOMEM;
-+ goto out_unlock;
-+ }
-+
-+ memcpy(new_ldt->entries, old_mm->context.ldt->entries,
-+ new_ldt->size * LDT_ENTRY_SIZE);
-+ finalize_ldt_struct(new_ldt);
-+
-+ mm->context.ldt = new_ldt;
-+
-+out_unlock:
-+ mutex_unlock(&old_mm->context.lock);
- return retval;
- }
-
-@@ -125,53 +146,47 @@ int init_new_context(struct task_struct
- */
- void destroy_context(struct mm_struct *mm)
- {
-- if (mm->context.size) {
--#ifdef CONFIG_X86_32
-- /* CHECKME: Can this ever happen ? */
-- if (mm == current->active_mm)
-- clear_LDT();
--#endif
-- paravirt_free_ldt(mm->context.ldt, mm->context.size);
-- if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
-- vfree(mm->context.ldt);
-- else
-- put_page(virt_to_page(mm->context.ldt));
-- mm->context.size = 0;
-- }
-+ free_ldt_struct(mm->context.ldt);
-+ mm->context.ldt = NULL;
- }
-
- static int read_ldt(void __user *ptr, unsigned long bytecount)
- {
-- int err;
-+ int retval;
- unsigned long size;
- struct mm_struct *mm = current->mm;
-
-- if (!mm->context.size)
-- return 0;
-+ mutex_lock(&mm->context.lock);
-+
-+ if (!mm->context.ldt) {
-+ retval = 0;
-+ goto out_unlock;
-+ }
-+
- if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
- bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
-
-- mutex_lock(&mm->context.lock);
-- size = mm->context.size * LDT_ENTRY_SIZE;
-+ size = mm->context.ldt->size * LDT_ENTRY_SIZE;
- if (size > bytecount)
- size = bytecount;
-
-- err = 0;
-- if (copy_to_user(ptr, mm->context.ldt, size))
-- err = -EFAULT;
-- mutex_unlock(&mm->context.lock);
-- if (err < 0)
-- goto error_return;
-+ if (copy_to_user(ptr, mm->context.ldt->entries, size)) {
-+ retval = -EFAULT;
-+ goto out_unlock;
-+ }
-+
- if (size != bytecount) {
-- /* zero-fill the rest */
-- if (clear_user(ptr + size, bytecount - size) != 0) {
-- err = -EFAULT;
-- goto error_return;
-+ /* Zero-fill the rest and pretend we read bytecount bytes. */
-+ if (clear_user(ptr + size, bytecount - size)) {
-+ retval = -EFAULT;
-+ goto out_unlock;
- }
- }
-- return bytecount;
--error_return:
-- return err;
-+ retval = bytecount;
-+
-+out_unlock:
-+ mutex_unlock(&mm->context.lock);
-+ return retval;
- }
-
- static int read_default_ldt(void __user *ptr, unsigned long bytecount)
-@@ -195,6 +210,8 @@ static int write_ldt(void __user *ptr, u
- struct desc_struct ldt;
- int error;
- struct user_desc ldt_info;
-+ int oldsize, newsize;
-+ struct ldt_struct *new_ldt, *old_ldt;
-
- error = -EINVAL;
- if (bytecount != sizeof(ldt_info))
-@@ -213,34 +230,39 @@ static int write_ldt(void __user *ptr, u
- goto out;
- }
-
-- mutex_lock(&mm->context.lock);
-- if (ldt_info.entry_number >= mm->context.size) {
-- error = alloc_ldt(¤t->mm->context,
-- ldt_info.entry_number + 1, 1);
-- if (error < 0)
-- goto out_unlock;
-- }
--
-- /* Allow LDTs to be cleared by the user. */
-- if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
-- if (oldmode || LDT_empty(&ldt_info)) {
-- memset(&ldt, 0, sizeof(ldt));
-- goto install;
-+ if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) ||
-+ LDT_empty(&ldt_info)) {
-+ /* The user wants to clear the entry. */
-+ memset(&ldt, 0, sizeof(ldt));
-+ } else {
-+ if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
-+ error = -EINVAL;
-+ goto out;
- }
-+
-+ fill_ldt(&ldt, &ldt_info);
-+ if (oldmode)
-+ ldt.avl = 0;
- }
-
-- if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
-- error = -EINVAL;
-+ mutex_lock(&mm->context.lock);
-+
-+ old_ldt = mm->context.ldt;
-+ oldsize = old_ldt ? old_ldt->size : 0;
-+ newsize = max((int)(ldt_info.entry_number + 1), oldsize);
-+
-+ error = -ENOMEM;
-+ new_ldt = alloc_ldt_struct(newsize);
-+ if (!new_ldt)
- goto out_unlock;
-- }
-
-- fill_ldt(&ldt, &ldt_info);
-- if (oldmode)
-- ldt.avl = 0;
--
-- /* Install the new entry ... */
--install:
-- write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt);
-+ if (old_ldt)
-+ memcpy(new_ldt->entries, old_ldt->entries, oldsize * LDT_ENTRY_SIZE);
-+ new_ldt->entries[ldt_info.entry_number] = ldt;
-+ finalize_ldt_struct(new_ldt);
-+
-+ install_ldt(mm, new_ldt);
-+ free_ldt_struct(old_ldt);
- error = 0;
-
- out_unlock:
---- a/arch/x86/kernel/process_64.c
-+++ b/arch/x86/kernel/process_64.c
-@@ -122,11 +122,11 @@ void __show_regs(struct pt_regs *regs, i
- void release_thread(struct task_struct *dead_task)
- {
- if (dead_task->mm) {
-- if (dead_task->mm->context.size) {
-+ if (dead_task->mm->context.ldt) {
- pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
- dead_task->comm,
- dead_task->mm->context.ldt,
-- dead_task->mm->context.size);
-+ dead_task->mm->context.ldt->size);
- BUG();
- }
- }
---- a/arch/x86/kernel/step.c
-+++ b/arch/x86/kernel/step.c
-@@ -5,6 +5,7 @@
- #include <linux/mm.h>
- #include <linux/ptrace.h>
- #include <asm/desc.h>
-+#include <asm/mmu_context.h>
-
- unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs)
- {
-@@ -30,10 +31,11 @@ unsigned long convert_ip_to_linear(struc
- seg &= ~7UL;
-
- mutex_lock(&child->mm->context.lock);
-- if (unlikely((seg >> 3) >= child->mm->context.size))
-+ if (unlikely(!child->mm->context.ldt ||
-+ (seg >> 3) >= child->mm->context.ldt->size))
- addr = -1L; /* bogus selector, access would fault */
- else {
-- desc = child->mm->context.ldt + seg;
-+ desc = &child->mm->context.ldt->entries[seg];
- base = get_desc_base(desc);
-
- /* 16-bit code segment? */
---- a/arch/x86/power/cpu.c
-+++ b/arch/x86/power/cpu.c
-@@ -23,6 +23,7 @@
- #include <asm/debugreg.h>
- #include <asm/fpu-internal.h> /* pcntxt_mask */
- #include <asm/cpu.h>
-+#include <asm/mmu_context.h>
-
- #ifdef CONFIG_X86_32
- __visible unsigned long saved_context_ebx;
-@@ -154,7 +155,7 @@ static void fix_processor_context(void)
- syscall_init(); /* This sets MSR_*STAR and related */
- #endif
- load_TR_desc(); /* This does ltr */
-- load_LDT(¤t->active_mm->context); /* This does lldt */
-+ load_mm_ldt(current->active_mm); /* This does lldt */
- }
-
- /**