From: Sasha Levin Date: Tue, 3 Sep 2019 04:56:24 +0000 (-0400) Subject: fixes for 4.9 X-Git-Tag: v4.4.191~39 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=9a73642fed72d62e9e9d1fd7c218a7a79a695a19;p=thirdparty%2Fkernel%2Fstable-queue.git fixes for 4.9 Signed-off-by: Sasha Levin --- diff --git a/queue-4.9/mm-zsmalloc.c-fix-race-condition-in-zs_destroy_pool.patch b/queue-4.9/mm-zsmalloc.c-fix-race-condition-in-zs_destroy_pool.patch new file mode 100644 index 00000000000..760c8592968 --- /dev/null +++ b/queue-4.9/mm-zsmalloc.c-fix-race-condition-in-zs_destroy_pool.patch @@ -0,0 +1,173 @@ +From 1d30127334c751070832ef2138717186cb765fa7 Mon Sep 17 00:00:00 2001 +From: Henry Burns +Date: Sat, 24 Aug 2019 17:55:06 -0700 +Subject: mm/zsmalloc.c: fix race condition in zs_destroy_pool + +[ Upstream commit 701d678599d0c1623aaf4139c03eea260a75b027 ] + +In zs_destroy_pool() we call flush_work(&pool->free_work). However, we +have no guarantee that migration isn't happening in the background at +that time. + +Since migration can't directly free pages, it relies on free_work being +scheduled to free the pages. But there's nothing preventing an +in-progress migrate from queuing the work *after* +zs_unregister_migration() has called flush_work(). Which would mean +pages still pointing at the inode when we free it. + +Since we know at destroy time all objects should be free, no new +migrations can come in (since zs_page_isolate() fails for fully-free +zspages). This means it is sufficient to track a "# isolated zspages" +count by class, and have the destroy logic ensure all such pages have +drained before proceeding. Keeping that state under the class spinlock +keeps the logic straightforward. + +In this case a memory leak could lead to an eventual crash if compaction +hits the leaked page. This crash would only occur if people are +changing their zswap backend at runtime (which eventually starts +destruction). + +Link: http://lkml.kernel.org/r/20190809181751.219326-2-henryburns@google.com +Fixes: 48b4800a1c6a ("zsmalloc: page migration support") +Signed-off-by: Henry Burns +Reviewed-by: Sergey Senozhatsky +Cc: Henry Burns +Cc: Minchan Kim +Cc: Shakeel Butt +Cc: Jonathan Adams +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + mm/zsmalloc.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 59 insertions(+), 2 deletions(-) + +diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c +index f624cc2d91d98..ad8a34bd15ca7 100644 +--- a/mm/zsmalloc.c ++++ b/mm/zsmalloc.c +@@ -52,6 +52,7 @@ + #include + #include + #include ++#include + #include + + #define ZSPAGE_MAGIC 0x58 +@@ -265,6 +266,10 @@ struct zs_pool { + #ifdef CONFIG_COMPACTION + struct inode *inode; + struct work_struct free_work; ++ /* A wait queue for when migration races with async_free_zspage() */ ++ wait_queue_head_t migration_wait; ++ atomic_long_t isolated_pages; ++ bool destroying; + #endif + }; + +@@ -1951,6 +1956,19 @@ static void putback_zspage_deferred(struct zs_pool *pool, + + } + ++static inline void zs_pool_dec_isolated(struct zs_pool *pool) ++{ ++ VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0); ++ atomic_long_dec(&pool->isolated_pages); ++ /* ++ * There's no possibility of racing, since wait_for_isolated_drain() ++ * checks the isolated count under &class->lock after enqueuing ++ * on migration_wait. ++ */ ++ if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying) ++ wake_up_all(&pool->migration_wait); ++} ++ + static void replace_sub_page(struct size_class *class, struct zspage *zspage, + struct page *newpage, struct page *oldpage) + { +@@ -2020,6 +2038,7 @@ bool zs_page_isolate(struct page *page, isolate_mode_t mode) + */ + if (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) { + get_zspage_mapping(zspage, &class_idx, &fullness); ++ atomic_long_inc(&pool->isolated_pages); + remove_zspage(class, zspage, fullness); + } + +@@ -2108,8 +2127,16 @@ int zs_page_migrate(struct address_space *mapping, struct page *newpage, + * Page migration is done so let's putback isolated zspage to + * the list if @page is final isolated subpage in the zspage. + */ +- if (!is_zspage_isolated(zspage)) ++ if (!is_zspage_isolated(zspage)) { ++ /* ++ * We cannot race with zs_destroy_pool() here because we wait ++ * for isolation to hit zero before we start destroying. ++ * Also, we ensure that everyone can see pool->destroying before ++ * we start waiting. ++ */ + putback_zspage_deferred(pool, class, zspage); ++ zs_pool_dec_isolated(pool); ++ } + + reset_page(page); + put_page(page); +@@ -2161,8 +2188,8 @@ void zs_page_putback(struct page *page) + * so let's defer. + */ + putback_zspage_deferred(pool, class, zspage); ++ zs_pool_dec_isolated(pool); + } +- + spin_unlock(&class->lock); + } + +@@ -2185,8 +2212,36 @@ static int zs_register_migration(struct zs_pool *pool) + return 0; + } + ++static bool pool_isolated_are_drained(struct zs_pool *pool) ++{ ++ return atomic_long_read(&pool->isolated_pages) == 0; ++} ++ ++/* Function for resolving migration */ ++static void wait_for_isolated_drain(struct zs_pool *pool) ++{ ++ ++ /* ++ * We're in the process of destroying the pool, so there are no ++ * active allocations. zs_page_isolate() fails for completely free ++ * zspages, so we need only wait for the zs_pool's isolated ++ * count to hit zero. ++ */ ++ wait_event(pool->migration_wait, ++ pool_isolated_are_drained(pool)); ++} ++ + static void zs_unregister_migration(struct zs_pool *pool) + { ++ pool->destroying = true; ++ /* ++ * We need a memory barrier here to ensure global visibility of ++ * pool->destroying. Thus pool->isolated pages will either be 0 in which ++ * case we don't care, or it will be > 0 and pool->destroying will ++ * ensure that we wake up once isolation hits 0. ++ */ ++ smp_mb(); ++ wait_for_isolated_drain(pool); /* This can block */ + flush_work(&pool->free_work); + iput(pool->inode); + } +@@ -2433,6 +2488,8 @@ struct zs_pool *zs_create_pool(const char *name) + if (!pool->name) + goto err; + ++ init_waitqueue_head(&pool->migration_wait); ++ + if (create_cache(pool)) + goto err; + +-- +2.20.1 + diff --git a/queue-4.9/series b/queue-4.9/series index 8514de9d81c..4a8e52073ae 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -61,3 +61,5 @@ alsa-seq-fix-potential-concurrent-access-to-the-deleted-pool.patch kvm-x86-don-t-update-rip-or-do-single-step-on-faulting-emulation.patch x86-apic-do-not-initialize-ldr-and-dfr-for-bigsmp.patch x86-apic-include-the-ldr-when-clearing-out-apic-registers.patch +uprobes-x86-fix-detection-of-32-bit-user-mode.patch +mm-zsmalloc.c-fix-race-condition-in-zs_destroy_pool.patch diff --git a/queue-4.9/uprobes-x86-fix-detection-of-32-bit-user-mode.patch b/queue-4.9/uprobes-x86-fix-detection-of-32-bit-user-mode.patch new file mode 100644 index 00000000000..f0308ab8259 --- /dev/null +++ b/queue-4.9/uprobes-x86-fix-detection-of-32-bit-user-mode.patch @@ -0,0 +1,130 @@ +From c932a2cd326caac68cd394e7203c616214b93a67 Mon Sep 17 00:00:00 2001 +From: Sebastian Mayr +Date: Sun, 28 Jul 2019 17:26:17 +0200 +Subject: uprobes/x86: Fix detection of 32-bit user mode + +[ Upstream commit 9212ec7d8357ea630031e89d0d399c761421c83b ] + +32-bit processes running on a 64-bit kernel are not always detected +correctly, causing the process to crash when uretprobes are installed. + +The reason for the crash is that in_ia32_syscall() is used to determine the +process's mode, which only works correctly when called from a syscall. + +In the case of uretprobes, however, the function is called from a exception +and always returns 'false' on a 64-bit kernel. In consequence this leads to +corruption of the process's return address. + +Fix this by using user_64bit_mode() instead of in_ia32_syscall(), which +is correct in any situation. + +[ tglx: Add a comment and the following historical info ] + +This should have been detected by the rename which happened in commit + + abfb9498ee13 ("x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()") + +which states in the changelog: + + The is_ia32_task()/is_x32_task() function names are a big misnomer: they + suggests that the compat-ness of a system call is a task property, which + is not true, the compatness of a system call purely depends on how it + was invoked through the system call layer. + ..... + +and then it went and blindly renamed every call site. + +Sadly enough this was already mentioned here: + + 8faaed1b9f50 ("uprobes/x86: Introduce sizeof_long(), cleanup adjust_ret_addr() and +arch_uretprobe_hijack_return_addr()") + +where the changelog says: + + TODO: is_ia32_task() is not what we actually want, TS_COMPAT does + not necessarily mean 32bit. Fortunately syscall-like insns can't be + probed so it actually works, but it would be better to rename and + use is_ia32_frame(). + +and goes all the way back to: + + 0326f5a94dde ("uprobes/core: Handle breakpoint and singlestep exceptions") + +Oh well. 7+ years until someone actually tried a uretprobe on a 32bit +process on a 64bit kernel.... + +Fixes: 0326f5a94dde ("uprobes/core: Handle breakpoint and singlestep exceptions") +Signed-off-by: Sebastian Mayr +Signed-off-by: Thomas Gleixner +Cc: Masami Hiramatsu +Cc: Dmitry Safonov +Cc: Oleg Nesterov +Cc: Srikar Dronamraju +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20190728152617.7308-1-me@sam.st +Signed-off-by: Sasha Levin +--- + arch/x86/kernel/uprobes.c | 17 ++++++++++------- + 1 file changed, 10 insertions(+), 7 deletions(-) + +diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c +index e78a6b1db74b0..e35466afe989d 100644 +--- a/arch/x86/kernel/uprobes.c ++++ b/arch/x86/kernel/uprobes.c +@@ -514,9 +514,12 @@ struct uprobe_xol_ops { + void (*abort)(struct arch_uprobe *, struct pt_regs *); + }; + +-static inline int sizeof_long(void) ++static inline int sizeof_long(struct pt_regs *regs) + { +- return in_ia32_syscall() ? 4 : 8; ++ /* ++ * Check registers for mode as in_xxx_syscall() does not apply here. ++ */ ++ return user_64bit_mode(regs) ? 8 : 4; + } + + static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) +@@ -527,9 +530,9 @@ static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) + + static int push_ret_address(struct pt_regs *regs, unsigned long ip) + { +- unsigned long new_sp = regs->sp - sizeof_long(); ++ unsigned long new_sp = regs->sp - sizeof_long(regs); + +- if (copy_to_user((void __user *)new_sp, &ip, sizeof_long())) ++ if (copy_to_user((void __user *)new_sp, &ip, sizeof_long(regs))) + return -EFAULT; + + regs->sp = new_sp; +@@ -562,7 +565,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs + long correction = utask->vaddr - utask->xol_vaddr; + regs->ip += correction; + } else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) { +- regs->sp += sizeof_long(); /* Pop incorrect return address */ ++ regs->sp += sizeof_long(regs); /* Pop incorrect return address */ + if (push_ret_address(regs, utask->vaddr + auprobe->defparam.ilen)) + return -ERESTART; + } +@@ -671,7 +674,7 @@ static int branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) + * "call" insn was executed out-of-line. Just restore ->sp and restart. + * We could also restore ->ip and try to call branch_emulate_op() again. + */ +- regs->sp += sizeof_long(); ++ regs->sp += sizeof_long(regs); + return -ERESTART; + } + +@@ -962,7 +965,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) + unsigned long + arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs) + { +- int rasize = sizeof_long(), nleft; ++ int rasize = sizeof_long(regs), nleft; + unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */ + + if (copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize)) +-- +2.20.1 +