From 0dce7c6c658b4ad0924023073fd35ac121a895cc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 17 Dec 2014 19:22:49 -0800 Subject: [PATCH] 3.18-stable patches added patches: isofs-fix-infinite-looping-over-ce-entries.patch x86-tls-disallow-unusual-tls-segments.patch x86-tls-validate-tls-entries-to-protect-espfix.patch x86_64-switch_to-load-tls-descriptors-before-switching-ds-and-es.patch --- ...fix-infinite-looping-over-ce-entries.patch | 55 ++++ ...86-tls-disallow-unusual-tls-segments.patch | 66 ++++ ...lidate-tls-entries-to-protect-espfix.patch | 77 +++++ ...scriptors-before-switching-ds-and-es.patch | 309 ++++++++++++++++++ 4 files changed, 507 insertions(+) create mode 100644 queue-3.18/isofs-fix-infinite-looping-over-ce-entries.patch create mode 100644 queue-3.18/x86-tls-disallow-unusual-tls-segments.patch create mode 100644 queue-3.18/x86-tls-validate-tls-entries-to-protect-espfix.patch create mode 100644 queue-3.18/x86_64-switch_to-load-tls-descriptors-before-switching-ds-and-es.patch diff --git a/queue-3.18/isofs-fix-infinite-looping-over-ce-entries.patch b/queue-3.18/isofs-fix-infinite-looping-over-ce-entries.patch new file mode 100644 index 00000000000..69a140ea063 --- /dev/null +++ b/queue-3.18/isofs-fix-infinite-looping-over-ce-entries.patch @@ -0,0 +1,55 @@ +From f54e18f1b831c92f6512d2eedb224cd63d607d3d Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Mon, 15 Dec 2014 14:22:46 +0100 +Subject: isofs: Fix infinite looping over CE entries + +From: Jan Kara + +commit f54e18f1b831c92f6512d2eedb224cd63d607d3d upstream. + +Rock Ridge extensions define so called Continuation Entries (CE) which +define where is further space with Rock Ridge data. Corrupted isofs +image can contain arbitrarily long chain of these, including a one +containing loop and thus causing kernel to end in an infinite loop when +traversing these entries. + +Limit the traversal to 32 entries which should be more than enough space +to store all the Rock Ridge data. + +Reported-by: P J P +Signed-off-by: Jan Kara +Signed-off-by: Greg Kroah-Hartman + +--- + fs/isofs/rock.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/fs/isofs/rock.c ++++ b/fs/isofs/rock.c +@@ -30,6 +30,7 @@ struct rock_state { + int cont_size; + int cont_extent; + int cont_offset; ++ int cont_loops; + struct inode *inode; + }; + +@@ -73,6 +74,9 @@ static void init_rock_state(struct rock_ + rs->inode = inode; + } + ++/* Maximum number of Rock Ridge continuation entries */ ++#define RR_MAX_CE_ENTRIES 32 ++ + /* + * Returns 0 if the caller should continue scanning, 1 if the scan must end + * and -ve on error. +@@ -105,6 +109,8 @@ static int rock_continue(struct rock_sta + goto out; + } + ret = -EIO; ++ if (++rs->cont_loops >= RR_MAX_CE_ENTRIES) ++ goto out; + bh = sb_bread(rs->inode->i_sb, rs->cont_extent); + if (bh) { + memcpy(rs->buffer, bh->b_data + rs->cont_offset, diff --git a/queue-3.18/x86-tls-disallow-unusual-tls-segments.patch b/queue-3.18/x86-tls-disallow-unusual-tls-segments.patch new file mode 100644 index 00000000000..39835afcb61 --- /dev/null +++ b/queue-3.18/x86-tls-disallow-unusual-tls-segments.patch @@ -0,0 +1,66 @@ +From 0e58af4e1d2166e9e33375a0f121e4867010d4f8 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 4 Dec 2014 16:48:17 -0800 +Subject: x86/tls: Disallow unusual TLS segments + +From: Andy Lutomirski + +commit 0e58af4e1d2166e9e33375a0f121e4867010d4f8 upstream. + +Users have no business installing custom code segments into the +GDT, and segments that are not present but are otherwise valid +are a historical source of interesting attacks. + +For completeness, block attempts to set the L bit. (Prior to +this patch, the L bit would have been silently dropped.) + +This is an ABI break. I've checked glibc, musl, and Wine, and +none of them look like they'll have any trouble. + +Note to stable maintainers: this is a hardening patch that fixes +no known bugs. Given the possibility of ABI issues, this +probably shouldn't be backported quickly. + +Signed-off-by: Andy Lutomirski +Acked-by: H. Peter Anvin +Cc: Konrad Rzeszutek Wilk +Cc: Linus Torvalds +Cc: Willy Tarreau +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/tls.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +--- a/arch/x86/kernel/tls.c ++++ b/arch/x86/kernel/tls.c +@@ -39,6 +39,28 @@ static bool tls_desc_okay(const struct u + if (!info->seg_32bit) + return false; + ++ /* Only allow data segments in the TLS array. */ ++ if (info->contents > 1) ++ return false; ++ ++ /* ++ * Non-present segments with DPL 3 present an interesting attack ++ * surface. The kernel should handle such segments correctly, ++ * but TLS is very difficult to protect in a sandbox, so prevent ++ * such segments from being created. ++ * ++ * If userspace needs to remove a TLS entry, it can still delete ++ * it outright. ++ */ ++ if (info->seg_not_present) ++ return false; ++ ++#ifdef CONFIG_X86_64 ++ /* The L bit makes no sense for data. */ ++ if (info->lm) ++ return false; ++#endif ++ + return true; + } + diff --git a/queue-3.18/x86-tls-validate-tls-entries-to-protect-espfix.patch b/queue-3.18/x86-tls-validate-tls-entries-to-protect-espfix.patch new file mode 100644 index 00000000000..72d352908c9 --- /dev/null +++ b/queue-3.18/x86-tls-validate-tls-entries-to-protect-espfix.patch @@ -0,0 +1,77 @@ +From 41bdc78544b8a93a9c6814b8bbbfef966272abbe Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 4 Dec 2014 16:48:16 -0800 +Subject: x86/tls: Validate TLS entries to protect espfix + +From: Andy Lutomirski + +commit 41bdc78544b8a93a9c6814b8bbbfef966272abbe upstream. + +Installing a 16-bit RW data segment into the GDT defeats espfix. +AFAICT this will not affect glibc, Wine, or dosemu at all. + +Signed-off-by: Andy Lutomirski +Acked-by: H. Peter Anvin +Cc: Konrad Rzeszutek Wilk +Cc: Linus Torvalds +Cc: Willy Tarreau +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/tls.c | 23 +++++++++++++++++++++++ + 1 file changed, 23 insertions(+) + +--- a/arch/x86/kernel/tls.c ++++ b/arch/x86/kernel/tls.c +@@ -27,6 +27,21 @@ static int get_free_idx(void) + return -ESRCH; + } + ++static bool tls_desc_okay(const struct user_desc *info) ++{ ++ if (LDT_empty(info)) ++ return true; ++ ++ /* ++ * espfix is required for 16-bit data segments, but espfix ++ * only works for LDT segments. ++ */ ++ if (!info->seg_32bit) ++ return false; ++ ++ return true; ++} ++ + static void set_tls_desc(struct task_struct *p, int idx, + const struct user_desc *info, int n) + { +@@ -66,6 +81,9 @@ int do_set_thread_area(struct task_struc + if (copy_from_user(&info, u_info, sizeof(info))) + return -EFAULT; + ++ if (!tls_desc_okay(&info)) ++ return -EINVAL; ++ + if (idx == -1) + idx = info.entry_number; + +@@ -192,6 +210,7 @@ int regset_tls_set(struct task_struct *t + { + struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES]; + const struct user_desc *info; ++ int i; + + if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || + (pos % sizeof(struct user_desc)) != 0 || +@@ -205,6 +224,10 @@ int regset_tls_set(struct task_struct *t + else + info = infobuf; + ++ for (i = 0; i < count / sizeof(struct user_desc); i++) ++ if (!tls_desc_okay(info + i)) ++ return -EINVAL; ++ + set_tls_desc(target, + GDT_ENTRY_TLS_MIN + (pos / sizeof(struct user_desc)), + info, count / sizeof(struct user_desc)); diff --git a/queue-3.18/x86_64-switch_to-load-tls-descriptors-before-switching-ds-and-es.patch b/queue-3.18/x86_64-switch_to-load-tls-descriptors-before-switching-ds-and-es.patch new file mode 100644 index 00000000000..a492b593b65 --- /dev/null +++ b/queue-3.18/x86_64-switch_to-load-tls-descriptors-before-switching-ds-and-es.patch @@ -0,0 +1,309 @@ +From f647d7c155f069c1a068030255c300663516420e Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Mon, 8 Dec 2014 13:55:20 -0800 +Subject: x86_64, switch_to(): Load TLS descriptors before switching DS and ES + +From: Andy Lutomirski + +commit f647d7c155f069c1a068030255c300663516420e upstream. + +Otherwise, if buggy user code points DS or ES into the TLS +array, they would be corrupted after a context switch. + +This also significantly improves the comments and documents some +gotchas in the code. + +Before this patch, the both tests below failed. With this +patch, the es test passes, although the gsbase test still fails. + + ----- begin es test ----- + +/* + * Copyright (c) 2014 Andy Lutomirski + * GPL v2 + */ + +static unsigned short GDT3(int idx) +{ + return (idx << 3) | 3; +} + +static int create_tls(int idx, unsigned int base) +{ + struct user_desc desc = { + .entry_number = idx, + .base_addr = base, + .limit = 0xfffff, + .seg_32bit = 1, + .contents = 0, /* Data, grow-up */ + .read_exec_only = 0, + .limit_in_pages = 1, + .seg_not_present = 0, + .useable = 0, + }; + + if (syscall(SYS_set_thread_area, &desc) != 0) + err(1, "set_thread_area"); + + return desc.entry_number; +} + +int main() +{ + int idx = create_tls(-1, 0); + printf("Allocated GDT index %d\n", idx); + + unsigned short orig_es; + asm volatile ("mov %%es,%0" : "=rm" (orig_es)); + + int errors = 0; + int total = 1000; + for (int i = 0; i < total; i++) { + asm volatile ("mov %0,%%es" : : "rm" (GDT3(idx))); + usleep(100); + + unsigned short es; + asm volatile ("mov %%es,%0" : "=rm" (es)); + asm volatile ("mov %0,%%es" : : "rm" (orig_es)); + if (es != GDT3(idx)) { + if (errors == 0) + printf("[FAIL]\tES changed from 0x%hx to 0x%hx\n", + GDT3(idx), es); + errors++; + } + } + + if (errors) { + printf("[FAIL]\tES was corrupted %d/%d times\n", errors, total); + return 1; + } else { + printf("[OK]\tES was preserved\n"); + return 0; + } +} + + ----- end es test ----- + + ----- begin gsbase test ----- + +/* + * gsbase.c, a gsbase test + * Copyright (c) 2014 Andy Lutomirski + * GPL v2 + */ + +static unsigned char *testptr, *testptr2; + +static unsigned char read_gs_testvals(void) +{ + unsigned char ret; + asm volatile ("movb %%gs:%1, %0" : "=r" (ret) : "m" (*testptr)); + return ret; +} + +int main() +{ + int errors = 0; + + testptr = mmap((void *)0x200000000UL, 1, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0); + if (testptr == MAP_FAILED) + err(1, "mmap"); + + testptr2 = mmap((void *)0x300000000UL, 1, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0); + if (testptr2 == MAP_FAILED) + err(1, "mmap"); + + *testptr = 0; + *testptr2 = 1; + + if (syscall(SYS_arch_prctl, ARCH_SET_GS, + (unsigned long)testptr2 - (unsigned long)testptr) != 0) + err(1, "ARCH_SET_GS"); + + usleep(100); + + if (read_gs_testvals() == 1) { + printf("[OK]\tARCH_SET_GS worked\n"); + } else { + printf("[FAIL]\tARCH_SET_GS failed\n"); + errors++; + } + + asm volatile ("mov %0,%%gs" : : "r" (0)); + + if (read_gs_testvals() == 0) { + printf("[OK]\tWriting 0 to gs worked\n"); + } else { + printf("[FAIL]\tWriting 0 to gs failed\n"); + errors++; + } + + usleep(100); + + if (read_gs_testvals() == 0) { + printf("[OK]\tgsbase is still zero\n"); + } else { + printf("[FAIL]\tgsbase was corrupted\n"); + errors++; + } + + return errors == 0 ? 0 : 1; +} + + ----- end gsbase test ----- + +Signed-off-by: Andy Lutomirski +Cc: Andi Kleen +Cc: Linus Torvalds +Link: http://lkml.kernel.org/r/509d27c9fec78217691c3dad91cec87e1006b34a.1418075657.git.luto@amacapital.net +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/process_64.c | 101 +++++++++++++++++++++++++++++++------------ + 1 file changed, 73 insertions(+), 28 deletions(-) + +--- a/arch/x86/kernel/process_64.c ++++ b/arch/x86/kernel/process_64.c +@@ -283,24 +283,9 @@ __switch_to(struct task_struct *prev_p, + + fpu = switch_fpu_prepare(prev_p, next_p, cpu); + +- /* +- * Reload esp0, LDT and the page table pointer: +- */ ++ /* Reload esp0 and ss1. */ + load_sp0(tss, next); + +- /* +- * Switch DS and ES. +- * This won't pick up thread selector changes, but I guess that is ok. +- */ +- savesegment(es, prev->es); +- if (unlikely(next->es | prev->es)) +- loadsegment(es, next->es); +- +- savesegment(ds, prev->ds); +- if (unlikely(next->ds | prev->ds)) +- loadsegment(ds, next->ds); +- +- + /* We must save %fs and %gs before load_TLS() because + * %fs and %gs may be cleared by load_TLS(). + * +@@ -309,41 +294,101 @@ __switch_to(struct task_struct *prev_p, + savesegment(fs, fsindex); + savesegment(gs, gsindex); + ++ /* ++ * Load TLS before restoring any segments so that segment loads ++ * reference the correct GDT entries. ++ */ + load_TLS(next, cpu); + + /* +- * Leave lazy mode, flushing any hypercalls made here. +- * This must be done before restoring TLS segments so +- * the GDT and LDT are properly updated, and must be +- * done before math_state_restore, so the TS bit is up +- * to date. ++ * Leave lazy mode, flushing any hypercalls made here. This ++ * must be done after loading TLS entries in the GDT but before ++ * loading segments that might reference them, and and it must ++ * be done before math_state_restore, so the TS bit is up to ++ * date. + */ + arch_end_context_switch(next_p); + ++ /* Switch DS and ES. ++ * ++ * Reading them only returns the selectors, but writing them (if ++ * nonzero) loads the full descriptor from the GDT or LDT. The ++ * LDT for next is loaded in switch_mm, and the GDT is loaded ++ * above. ++ * ++ * We therefore need to write new values to the segment ++ * registers on every context switch unless both the new and old ++ * values are zero. ++ * ++ * Note that we don't need to do anything for CS and SS, as ++ * those are saved and restored as part of pt_regs. ++ */ ++ savesegment(es, prev->es); ++ if (unlikely(next->es | prev->es)) ++ loadsegment(es, next->es); ++ ++ savesegment(ds, prev->ds); ++ if (unlikely(next->ds | prev->ds)) ++ loadsegment(ds, next->ds); ++ + /* + * Switch FS and GS. + * +- * Segment register != 0 always requires a reload. Also +- * reload when it has changed. When prev process used 64bit +- * base always reload to avoid an information leak. ++ * These are even more complicated than FS and GS: they have ++ * 64-bit bases are that controlled by arch_prctl. Those bases ++ * only differ from the values in the GDT or LDT if the selector ++ * is 0. ++ * ++ * Loading the segment register resets the hidden base part of ++ * the register to 0 or the value from the GDT / LDT. If the ++ * next base address zero, writing 0 to the segment register is ++ * much faster than using wrmsr to explicitly zero the base. ++ * ++ * The thread_struct.fs and thread_struct.gs values are 0 ++ * if the fs and gs bases respectively are not overridden ++ * from the values implied by fsindex and gsindex. They ++ * are nonzero, and store the nonzero base addresses, if ++ * the bases are overridden. ++ * ++ * (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) should ++ * be impossible. ++ * ++ * Therefore we need to reload the segment registers if either ++ * the old or new selector is nonzero, and we need to override ++ * the base address if next thread expects it to be overridden. ++ * ++ * This code is unnecessarily slow in the case where the old and ++ * new indexes are zero and the new base is nonzero -- it will ++ * unnecessarily write 0 to the selector before writing the new ++ * base address. ++ * ++ * Note: This all depends on arch_prctl being the only way that ++ * user code can override the segment base. Once wrfsbase and ++ * wrgsbase are enabled, most of this code will need to change. + */ + if (unlikely(fsindex | next->fsindex | prev->fs)) { + loadsegment(fs, next->fsindex); ++ + /* +- * Check if the user used a selector != 0; if yes +- * clear 64bit base, since overloaded base is always +- * mapped to the Null selector ++ * If user code wrote a nonzero value to FS, then it also ++ * cleared the overridden base address. ++ * ++ * XXX: if user code wrote 0 to FS and cleared the base ++ * address itself, we won't notice and we'll incorrectly ++ * restore the prior base address next time we reschdule ++ * the process. + */ + if (fsindex) + prev->fs = 0; + } +- /* when next process has a 64bit base use it */ + if (next->fs) + wrmsrl(MSR_FS_BASE, next->fs); + prev->fsindex = fsindex; + + if (unlikely(gsindex | next->gsindex | prev->gs)) { + load_gs_index(next->gsindex); ++ ++ /* This works (and fails) the same way as fsindex above. */ + if (gsindex) + prev->gs = 0; + } -- 2.47.3