From: Greg Kroah-Hartman Date: Fri, 14 Aug 2015 02:24:38 +0000 (-0700) Subject: added next-4.1/ X-Git-Tag: v3.10.87~19 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=462a7b8edba1b743c789a365a79ce30dc0ba4277;p=thirdparty%2Fkernel%2Fstable-queue.git added next-4.1/ --- diff --git a/next-4.1/x86-ldt-make-modify_ldt-synchronous.patch b/next-4.1/x86-ldt-make-modify_ldt-synchronous.patch new file mode 100644 index 00000000000..2729cf7e62b --- /dev/null +++ b/next-4.1/x86-ldt-make-modify_ldt-synchronous.patch @@ -0,0 +1,628 @@ +From 37868fe113ff2ba814b3b4eb12df214df555f8dc Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 30 Jul 2015 14:31:32 -0700 +Subject: x86/ldt: Make modify_ldt synchronous + +From: Andy Lutomirski + +commit 37868fe113ff2ba814b3b4eb12df214df555f8dc upstream. + +modify_ldt() has questionable locking and does not synchronize +threads. Improve it: redesign the locking and synchronize all +threads' LDTs using an IPI on all modifications. + +This will dramatically slow down modify_ldt in multithreaded +programs, but there shouldn't be any multithreaded programs that +care about modify_ldt's performance in the first place. + +This fixes some fallout from the CVE-2015-5157 fixes. + +Signed-off-by: Andy Lutomirski +Reviewed-by: Borislav Petkov +Cc: Andrew Cooper +Cc: Andy Lutomirski +Cc: Boris Ostrovsky +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Denys Vlasenko +Cc: H. Peter Anvin +Cc: Jan Beulich +Cc: Konrad Rzeszutek Wilk +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Sasha Levin +Cc: Steven Rostedt +Cc: Thomas Gleixner +Cc: security@kernel.org +Cc: xen-devel +Link: http://lkml.kernel.org/r/4c6978476782160600471bd865b318db34c7b628.1438291540.git.luto@kernel.org +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/desc.h | 15 -- + arch/x86/include/asm/mmu.h | 3 + arch/x86/include/asm/mmu_context.h | 54 ++++++- + arch/x86/kernel/cpu/common.c | 4 + arch/x86/kernel/cpu/perf_event.c | 12 + + arch/x86/kernel/ldt.c | 264 ++++++++++++++++++++----------------- + arch/x86/kernel/process_64.c | 4 + arch/x86/kernel/step.c | 6 + arch/x86/power/cpu.c | 3 + 9 files changed, 211 insertions(+), 154 deletions(-) + +--- a/arch/x86/include/asm/desc.h ++++ b/arch/x86/include/asm/desc.h +@@ -280,21 +280,6 @@ static inline void clear_LDT(void) + set_ldt(NULL, 0); + } + +-/* +- * load one particular LDT into the current CPU +- */ +-static inline void load_LDT_nolock(mm_context_t *pc) +-{ +- set_ldt(pc->ldt, pc->size); +-} +- +-static inline void load_LDT(mm_context_t *pc) +-{ +- preempt_disable(); +- load_LDT_nolock(pc); +- preempt_enable(); +-} +- + static inline unsigned long get_desc_base(const struct desc_struct *desc) + { + return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); +--- a/arch/x86/include/asm/mmu.h ++++ b/arch/x86/include/asm/mmu.h +@@ -9,8 +9,7 @@ + * we put the segment information here. + */ + typedef struct { +- void *ldt; +- int size; ++ struct ldt_struct *ldt; + + #ifdef CONFIG_X86_64 + /* True if mm supports a task running in 32 bit compatibility mode. */ +--- a/arch/x86/include/asm/mmu_context.h ++++ b/arch/x86/include/asm/mmu_context.h +@@ -34,6 +34,50 @@ static inline void load_mm_cr4(struct mm + #endif + + /* ++ * ldt_structs can be allocated, used, and freed, but they are never ++ * modified while live. ++ */ ++struct ldt_struct { ++ /* ++ * Xen requires page-aligned LDTs with special permissions. This is ++ * needed to prevent us from installing evil descriptors such as ++ * call gates. On native, we could merge the ldt_struct and LDT ++ * allocations, but it's not worth trying to optimize. ++ */ ++ struct desc_struct *entries; ++ int size; ++}; ++ ++static inline void load_mm_ldt(struct mm_struct *mm) ++{ ++ struct ldt_struct *ldt; ++ ++ /* lockless_dereference synchronizes with smp_store_release */ ++ ldt = lockless_dereference(mm->context.ldt); ++ ++ /* ++ * Any change to mm->context.ldt is followed by an IPI to all ++ * CPUs with the mm active. The LDT will not be freed until ++ * after the IPI is handled by all such CPUs. This means that, ++ * if the ldt_struct changes before we return, the values we see ++ * will be safe, and the new values will be loaded before we run ++ * any user code. ++ * ++ * NB: don't try to convert this to use RCU without extreme care. ++ * We would still need IRQs off, because we don't want to change ++ * the local LDT after an IPI loaded a newer value than the one ++ * that we can see. ++ */ ++ ++ if (unlikely(ldt)) ++ set_ldt(ldt->entries, ldt->size); ++ else ++ clear_LDT(); ++ ++ DEBUG_LOCKS_WARN_ON(preemptible()); ++} ++ ++/* + * Used for LDT copy/destruction. + */ + int init_new_context(struct task_struct *tsk, struct mm_struct *mm); +@@ -78,12 +122,12 @@ static inline void switch_mm(struct mm_s + * was called and then modify_ldt changed + * prev->context.ldt but suppressed an IPI to this CPU. + * In this case, prev->context.ldt != NULL, because we +- * never free an LDT while the mm still exists. That +- * means that next->context.ldt != prev->context.ldt, +- * because mms never share an LDT. ++ * never set context.ldt to NULL while the mm still ++ * exists. That means that next->context.ldt != ++ * prev->context.ldt, because mms never share an LDT. + */ + if (unlikely(prev->context.ldt != next->context.ldt)) +- load_LDT_nolock(&next->context); ++ load_mm_ldt(next); + } + #ifdef CONFIG_SMP + else { +@@ -106,7 +150,7 @@ static inline void switch_mm(struct mm_s + load_cr3(next->pgd); + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); + load_mm_cr4(next); +- load_LDT_nolock(&next->context); ++ load_mm_ldt(next); + } + } + #endif +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1434,7 +1434,7 @@ void cpu_init(void) + load_sp0(t, ¤t->thread); + set_tss_desc(cpu, t); + load_TR_desc(); +- load_LDT(&init_mm.context); ++ load_mm_ldt(&init_mm); + + clear_all_debug_regs(); + dbg_restore_debug_regs(); +@@ -1483,7 +1483,7 @@ void cpu_init(void) + load_sp0(t, thread); + set_tss_desc(cpu, t); + load_TR_desc(); +- load_LDT(&init_mm.context); ++ load_mm_ldt(&init_mm); + + t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); + +--- a/arch/x86/kernel/cpu/perf_event.c ++++ b/arch/x86/kernel/cpu/perf_event.c +@@ -2170,21 +2170,25 @@ static unsigned long get_segment_base(un + int idx = segment >> 3; + + if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) { ++ struct ldt_struct *ldt; ++ + if (idx > LDT_ENTRIES) + return 0; + +- if (idx > current->active_mm->context.size) ++ /* IRQs are off, so this synchronizes with smp_store_release */ ++ ldt = lockless_dereference(current->active_mm->context.ldt); ++ if (!ldt || idx > ldt->size) + return 0; + +- desc = current->active_mm->context.ldt; ++ desc = &ldt->entries[idx]; + } else { + if (idx > GDT_ENTRIES) + return 0; + +- desc = raw_cpu_ptr(gdt_page.gdt); ++ desc = raw_cpu_ptr(gdt_page.gdt) + idx; + } + +- return get_desc_base(desc + idx); ++ return get_desc_base(desc); + } + + #ifdef CONFIG_COMPAT +--- a/arch/x86/kernel/ldt.c ++++ b/arch/x86/kernel/ldt.c +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -20,82 +21,82 @@ + #include + #include + +-#ifdef CONFIG_SMP ++/* context.lock is held for us, so we don't need any locking. */ + static void flush_ldt(void *current_mm) + { +- if (current->active_mm == current_mm) +- load_LDT(¤t->active_mm->context); ++ mm_context_t *pc; ++ ++ if (current->active_mm != current_mm) ++ return; ++ ++ pc = ¤t->active_mm->context; ++ set_ldt(pc->ldt->entries, pc->ldt->size); + } +-#endif + +-static int alloc_ldt(mm_context_t *pc, int mincount, int reload) ++/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */ ++static struct ldt_struct *alloc_ldt_struct(int size) + { +- void *oldldt, *newldt; +- int oldsize; ++ struct ldt_struct *new_ldt; ++ int alloc_size; + +- if (mincount <= pc->size) +- return 0; +- oldsize = pc->size; +- mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) & +- (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1)); +- if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) +- newldt = vmalloc(mincount * LDT_ENTRY_SIZE); +- else +- newldt = (void *)__get_free_page(GFP_KERNEL); ++ if (size > LDT_ENTRIES) ++ return NULL; + +- if (!newldt) +- return -ENOMEM; ++ new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL); ++ if (!new_ldt) ++ return NULL; ++ ++ BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct)); ++ alloc_size = size * LDT_ENTRY_SIZE; ++ ++ /* ++ * Xen is very picky: it requires a page-aligned LDT that has no ++ * trailing nonzero bytes in any page that contains LDT descriptors. ++ * Keep it simple: zero the whole allocation and never allocate less ++ * than PAGE_SIZE. ++ */ ++ if (alloc_size > PAGE_SIZE) ++ new_ldt->entries = vzalloc(alloc_size); ++ else ++ new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL); + +- if (oldsize) +- memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE); +- oldldt = pc->ldt; +- memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, +- (mincount - oldsize) * LDT_ENTRY_SIZE); +- +- paravirt_alloc_ldt(newldt, mincount); +- +-#ifdef CONFIG_X86_64 +- /* CHECKME: Do we really need this ? */ +- wmb(); +-#endif +- pc->ldt = newldt; +- wmb(); +- pc->size = mincount; +- wmb(); +- +- if (reload) { +-#ifdef CONFIG_SMP +- preempt_disable(); +- load_LDT(pc); +- if (!cpumask_equal(mm_cpumask(current->mm), +- cpumask_of(smp_processor_id()))) +- smp_call_function(flush_ldt, current->mm, 1); +- preempt_enable(); +-#else +- load_LDT(pc); +-#endif +- } +- if (oldsize) { +- paravirt_free_ldt(oldldt, oldsize); +- if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) +- vfree(oldldt); +- else +- put_page(virt_to_page(oldldt)); ++ if (!new_ldt->entries) { ++ kfree(new_ldt); ++ return NULL; + } +- return 0; ++ ++ new_ldt->size = size; ++ return new_ldt; + } + +-static inline int copy_ldt(mm_context_t *new, mm_context_t *old) ++/* After calling this, the LDT is immutable. */ ++static void finalize_ldt_struct(struct ldt_struct *ldt) + { +- int err = alloc_ldt(new, old->size, 0); +- int i; ++ paravirt_alloc_ldt(ldt->entries, ldt->size); ++} ++ ++/* context.lock is held */ ++static void install_ldt(struct mm_struct *current_mm, ++ struct ldt_struct *ldt) ++{ ++ /* Synchronizes with lockless_dereference in load_mm_ldt. */ ++ smp_store_release(¤t_mm->context.ldt, ldt); ++ ++ /* Activate the LDT for all CPUs using current_mm. */ ++ on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true); ++} + +- if (err < 0) +- return err; ++static void free_ldt_struct(struct ldt_struct *ldt) ++{ ++ if (likely(!ldt)) ++ return; + +- for (i = 0; i < old->size; i++) +- write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); +- return 0; ++ paravirt_free_ldt(ldt->entries, ldt->size); ++ if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) ++ vfree(ldt->entries); ++ else ++ kfree(ldt->entries); ++ kfree(ldt); + } + + /* +@@ -104,17 +105,37 @@ static inline int copy_ldt(mm_context_t + */ + int init_new_context(struct task_struct *tsk, struct mm_struct *mm) + { ++ struct ldt_struct *new_ldt; + struct mm_struct *old_mm; + int retval = 0; + + mutex_init(&mm->context.lock); +- mm->context.size = 0; + old_mm = current->mm; +- if (old_mm && old_mm->context.size > 0) { +- mutex_lock(&old_mm->context.lock); +- retval = copy_ldt(&mm->context, &old_mm->context); +- mutex_unlock(&old_mm->context.lock); ++ if (!old_mm) { ++ mm->context.ldt = NULL; ++ return 0; + } ++ ++ mutex_lock(&old_mm->context.lock); ++ if (!old_mm->context.ldt) { ++ mm->context.ldt = NULL; ++ goto out_unlock; ++ } ++ ++ new_ldt = alloc_ldt_struct(old_mm->context.ldt->size); ++ if (!new_ldt) { ++ retval = -ENOMEM; ++ goto out_unlock; ++ } ++ ++ memcpy(new_ldt->entries, old_mm->context.ldt->entries, ++ new_ldt->size * LDT_ENTRY_SIZE); ++ finalize_ldt_struct(new_ldt); ++ ++ mm->context.ldt = new_ldt; ++ ++out_unlock: ++ mutex_unlock(&old_mm->context.lock); + return retval; + } + +@@ -125,53 +146,47 @@ int init_new_context(struct task_struct + */ + void destroy_context(struct mm_struct *mm) + { +- if (mm->context.size) { +-#ifdef CONFIG_X86_32 +- /* CHECKME: Can this ever happen ? */ +- if (mm == current->active_mm) +- clear_LDT(); +-#endif +- paravirt_free_ldt(mm->context.ldt, mm->context.size); +- if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) +- vfree(mm->context.ldt); +- else +- put_page(virt_to_page(mm->context.ldt)); +- mm->context.size = 0; +- } ++ free_ldt_struct(mm->context.ldt); ++ mm->context.ldt = NULL; + } + + static int read_ldt(void __user *ptr, unsigned long bytecount) + { +- int err; ++ int retval; + unsigned long size; + struct mm_struct *mm = current->mm; + +- if (!mm->context.size) +- return 0; ++ mutex_lock(&mm->context.lock); ++ ++ if (!mm->context.ldt) { ++ retval = 0; ++ goto out_unlock; ++ } ++ + if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) + bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; + +- mutex_lock(&mm->context.lock); +- size = mm->context.size * LDT_ENTRY_SIZE; ++ size = mm->context.ldt->size * LDT_ENTRY_SIZE; + if (size > bytecount) + size = bytecount; + +- err = 0; +- if (copy_to_user(ptr, mm->context.ldt, size)) +- err = -EFAULT; +- mutex_unlock(&mm->context.lock); +- if (err < 0) +- goto error_return; ++ if (copy_to_user(ptr, mm->context.ldt->entries, size)) { ++ retval = -EFAULT; ++ goto out_unlock; ++ } ++ + if (size != bytecount) { +- /* zero-fill the rest */ +- if (clear_user(ptr + size, bytecount - size) != 0) { +- err = -EFAULT; +- goto error_return; ++ /* Zero-fill the rest and pretend we read bytecount bytes. */ ++ if (clear_user(ptr + size, bytecount - size)) { ++ retval = -EFAULT; ++ goto out_unlock; + } + } +- return bytecount; +-error_return: +- return err; ++ retval = bytecount; ++ ++out_unlock: ++ mutex_unlock(&mm->context.lock); ++ return retval; + } + + static int read_default_ldt(void __user *ptr, unsigned long bytecount) +@@ -195,6 +210,8 @@ static int write_ldt(void __user *ptr, u + struct desc_struct ldt; + int error; + struct user_desc ldt_info; ++ int oldsize, newsize; ++ struct ldt_struct *new_ldt, *old_ldt; + + error = -EINVAL; + if (bytecount != sizeof(ldt_info)) +@@ -213,34 +230,39 @@ static int write_ldt(void __user *ptr, u + goto out; + } + +- mutex_lock(&mm->context.lock); +- if (ldt_info.entry_number >= mm->context.size) { +- error = alloc_ldt(¤t->mm->context, +- ldt_info.entry_number + 1, 1); +- if (error < 0) +- goto out_unlock; +- } +- +- /* Allow LDTs to be cleared by the user. */ +- if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { +- if (oldmode || LDT_empty(&ldt_info)) { +- memset(&ldt, 0, sizeof(ldt)); +- goto install; ++ if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) || ++ LDT_empty(&ldt_info)) { ++ /* The user wants to clear the entry. */ ++ memset(&ldt, 0, sizeof(ldt)); ++ } else { ++ if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { ++ error = -EINVAL; ++ goto out; + } ++ ++ fill_ldt(&ldt, &ldt_info); ++ if (oldmode) ++ ldt.avl = 0; + } + +- if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { +- error = -EINVAL; ++ mutex_lock(&mm->context.lock); ++ ++ old_ldt = mm->context.ldt; ++ oldsize = old_ldt ? old_ldt->size : 0; ++ newsize = max((int)(ldt_info.entry_number + 1), oldsize); ++ ++ error = -ENOMEM; ++ new_ldt = alloc_ldt_struct(newsize); ++ if (!new_ldt) + goto out_unlock; +- } + +- fill_ldt(&ldt, &ldt_info); +- if (oldmode) +- ldt.avl = 0; +- +- /* Install the new entry ... */ +-install: +- write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt); ++ if (old_ldt) ++ memcpy(new_ldt->entries, old_ldt->entries, oldsize * LDT_ENTRY_SIZE); ++ new_ldt->entries[ldt_info.entry_number] = ldt; ++ finalize_ldt_struct(new_ldt); ++ ++ install_ldt(mm, new_ldt); ++ free_ldt_struct(old_ldt); + error = 0; + + out_unlock: +--- a/arch/x86/kernel/process_64.c ++++ b/arch/x86/kernel/process_64.c +@@ -122,11 +122,11 @@ void __show_regs(struct pt_regs *regs, i + void release_thread(struct task_struct *dead_task) + { + if (dead_task->mm) { +- if (dead_task->mm->context.size) { ++ if (dead_task->mm->context.ldt) { + pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", + dead_task->comm, + dead_task->mm->context.ldt, +- dead_task->mm->context.size); ++ dead_task->mm->context.ldt->size); + BUG(); + } + } +--- a/arch/x86/kernel/step.c ++++ b/arch/x86/kernel/step.c +@@ -5,6 +5,7 @@ + #include + #include + #include ++#include + + unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs) + { +@@ -30,10 +31,11 @@ unsigned long convert_ip_to_linear(struc + seg &= ~7UL; + + mutex_lock(&child->mm->context.lock); +- if (unlikely((seg >> 3) >= child->mm->context.size)) ++ if (unlikely(!child->mm->context.ldt || ++ (seg >> 3) >= child->mm->context.ldt->size)) + addr = -1L; /* bogus selector, access would fault */ + else { +- desc = child->mm->context.ldt + seg; ++ desc = &child->mm->context.ldt->entries[seg]; + base = get_desc_base(desc); + + /* 16-bit code segment? */ +--- a/arch/x86/power/cpu.c ++++ b/arch/x86/power/cpu.c +@@ -23,6 +23,7 @@ + #include + #include /* pcntxt_mask */ + #include ++#include + + #ifdef CONFIG_X86_32 + __visible unsigned long saved_context_ebx; +@@ -154,7 +155,7 @@ static void fix_processor_context(void) + syscall_init(); /* This sets MSR_*STAR and related */ + #endif + load_TR_desc(); /* This does ltr */ +- load_LDT(¤t->active_mm->context); /* This does lldt */ ++ load_mm_ldt(current->active_mm); /* This does lldt */ + } + + /**