From: Greg Kroah-Hartman Date: Mon, 24 Aug 2020 08:22:59 +0000 (+0200) Subject: 4.19-stable patches X-Git-Tag: v4.4.234~17 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=490c0a354f8feabbf042cc8f16dd8cf8f7d3ab11;p=thirdparty%2Fkernel%2Fstable-queue.git 4.19-stable patches added patches: clk-evict-unregistered-clks-from-parent-caches.patch do_epoll_ctl-clean-the-failure-exits-up-a-bit.patch epoll-keep-a-reference-on-files-added-to-the-check-list.patch mm-hugetlb-fix-calculation-of-adjust_range_if_pmd_sharing_possible.patch xen-don-t-reschedule-in-preemption-off-sections.patch --- diff --git a/queue-4.19/clk-evict-unregistered-clks-from-parent-caches.patch b/queue-4.19/clk-evict-unregistered-clks-from-parent-caches.patch new file mode 100644 index 00000000000..84f0ac53a10 --- /dev/null +++ b/queue-4.19/clk-evict-unregistered-clks-from-parent-caches.patch @@ -0,0 +1,129 @@ +From bdcf1dc253248542537a742ae1e7ccafdd03f2d3 Mon Sep 17 00:00:00 2001 +From: Stephen Boyd +Date: Wed, 28 Aug 2019 11:19:59 -0700 +Subject: clk: Evict unregistered clks from parent caches + +From: Stephen Boyd + +commit bdcf1dc253248542537a742ae1e7ccafdd03f2d3 upstream. + +We leave a dangling pointer in each clk_core::parents array that has an +unregistered clk as a potential parent when that clk_core pointer is +freed by clk{_hw}_unregister(). It is impossible for the true parent of +a clk to be set with clk_set_parent() once the dangling pointer is left +in the cache because we compare parent pointers in +clk_fetch_parent_index() instead of checking for a matching clk name or +clk_hw pointer. + +Before commit ede77858473a ("clk: Remove global clk traversal on fetch +parent index"), we would check clk_hw pointers, which has a higher +chance of being the same between registration and unregistration, but it +can still be allocated and freed by the clk provider. In fact, this has +been a long standing problem since commit da0f0b2c3ad2 ("clk: Correct +lookup logic in clk_fetch_parent_index()") where we stopped trying to +compare clk names and skipped over entries in the cache that weren't +NULL. + +There are good (performance) reasons to not do the global tree lookup in +cases where the cache holds dangling pointers to parents that have been +unregistered. Let's take the performance hit on the uncommon +registration path instead. Loop through all the clk_core::parents arrays +when a clk is unregistered and set the entry to NULL when the parent +cache entry and clk being unregistered are the same pointer. This will +fix this problem and avoid the overhead for the "normal" case. + +Based on a patch by Bjorn Andersson. + +Fixes: da0f0b2c3ad2 ("clk: Correct lookup logic in clk_fetch_parent_index()") +Reviewed-by: Bjorn Andersson +Tested-by: Sai Prakash Ranjan +Signed-off-by: Stephen Boyd +Link: https://lkml.kernel.org/r/20190828181959.204401-1-sboyd@kernel.org +Tested-by: Naresh Kamboju +Signed-off-by: Greg Kroah-Hartman +--- + drivers/clk/clk.c | 52 +++++++++++++++++++++++++++++++++++++++++----------- + 1 file changed, 41 insertions(+), 11 deletions(-) + +--- a/drivers/clk/clk.c ++++ b/drivers/clk/clk.c +@@ -40,6 +40,17 @@ static HLIST_HEAD(clk_root_list); + static HLIST_HEAD(clk_orphan_list); + static LIST_HEAD(clk_notifier_list); + ++static struct hlist_head *all_lists[] = { ++ &clk_root_list, ++ &clk_orphan_list, ++ NULL, ++}; ++ ++static struct hlist_head *orphan_list[] = { ++ &clk_orphan_list, ++ NULL, ++}; ++ + /*** private data structures ***/ + + struct clk_core { +@@ -2618,17 +2629,6 @@ static int inited = 0; + static DEFINE_MUTEX(clk_debug_lock); + static HLIST_HEAD(clk_debug_list); + +-static struct hlist_head *all_lists[] = { +- &clk_root_list, +- &clk_orphan_list, +- NULL, +-}; +- +-static struct hlist_head *orphan_list[] = { +- &clk_orphan_list, +- NULL, +-}; +- + static void clk_summary_show_one(struct seq_file *s, struct clk_core *c, + int level) + { +@@ -3328,6 +3328,34 @@ static const struct clk_ops clk_nodrv_op + .set_parent = clk_nodrv_set_parent, + }; + ++static void clk_core_evict_parent_cache_subtree(struct clk_core *root, ++ struct clk_core *target) ++{ ++ int i; ++ struct clk_core *child; ++ ++ for (i = 0; i < root->num_parents; i++) ++ if (root->parents[i] == target) ++ root->parents[i] = NULL; ++ ++ hlist_for_each_entry(child, &root->children, child_node) ++ clk_core_evict_parent_cache_subtree(child, target); ++} ++ ++/* Remove this clk from all parent caches */ ++static void clk_core_evict_parent_cache(struct clk_core *core) ++{ ++ struct hlist_head **lists; ++ struct clk_core *root; ++ ++ lockdep_assert_held(&prepare_lock); ++ ++ for (lists = all_lists; *lists; lists++) ++ hlist_for_each_entry(root, *lists, child_node) ++ clk_core_evict_parent_cache_subtree(root, core); ++ ++} ++ + /** + * clk_unregister - unregister a currently registered clock + * @clk: clock to unregister +@@ -3366,6 +3394,8 @@ void clk_unregister(struct clk *clk) + clk_core_set_parent_nolock(child, NULL); + } + ++ clk_core_evict_parent_cache(clk->core); ++ + hlist_del_init(&clk->core->child_node); + + if (clk->core->prepare_count) diff --git a/queue-4.19/do_epoll_ctl-clean-the-failure-exits-up-a-bit.patch b/queue-4.19/do_epoll_ctl-clean-the-failure-exits-up-a-bit.patch new file mode 100644 index 00000000000..576f2e67002 --- /dev/null +++ b/queue-4.19/do_epoll_ctl-clean-the-failure-exits-up-a-bit.patch @@ -0,0 +1,51 @@ +From 52c479697c9b73f628140dcdfcd39ea302d05482 Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Sat, 22 Aug 2020 18:25:52 -0400 +Subject: do_epoll_ctl(): clean the failure exits up a bit + +From: Al Viro + +commit 52c479697c9b73f628140dcdfcd39ea302d05482 upstream. + +Signed-off-by: Al Viro +Signed-off-by: Marc Zyngier +Signed-off-by: Greg Kroah-Hartman +--- + fs/eventpoll.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +--- a/fs/eventpoll.c ++++ b/fs/eventpoll.c +@@ -2094,10 +2094,8 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, in + mutex_lock(&epmutex); + if (is_file_epoll(tf.file)) { + error = -ELOOP; +- if (ep_loop_check(ep, tf.file) != 0) { +- clear_tfile_check_list(); ++ if (ep_loop_check(ep, tf.file) != 0) + goto error_tgt_fput; +- } + } else { + get_file(tf.file); + list_add(&tf.file->f_tfile_llink, +@@ -2126,8 +2124,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, in + error = ep_insert(ep, &epds, tf.file, fd, full_check); + } else + error = -EEXIST; +- if (full_check) +- clear_tfile_check_list(); + break; + case EPOLL_CTL_DEL: + if (epi) +@@ -2150,8 +2146,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, in + mutex_unlock(&ep->mtx); + + error_tgt_fput: +- if (full_check) ++ if (full_check) { ++ clear_tfile_check_list(); + mutex_unlock(&epmutex); ++ } + + fdput(tf); + error_fput: diff --git a/queue-4.19/epoll-keep-a-reference-on-files-added-to-the-check-list.patch b/queue-4.19/epoll-keep-a-reference-on-files-added-to-the-check-list.patch new file mode 100644 index 00000000000..5b1068cdd95 --- /dev/null +++ b/queue-4.19/epoll-keep-a-reference-on-files-added-to-the-check-list.patch @@ -0,0 +1,66 @@ +From a9ed4a6560b8562b7e2e2bed9527e88001f7b682 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Wed, 19 Aug 2020 17:12:17 +0100 +Subject: epoll: Keep a reference on files added to the check list + +From: Marc Zyngier + +commit a9ed4a6560b8562b7e2e2bed9527e88001f7b682 upstream. + +When adding a new fd to an epoll, and that this new fd is an +epoll fd itself, we recursively scan the fds attached to it +to detect cycles, and add non-epool files to a "check list" +that gets subsequently parsed. + +However, this check list isn't completely safe when deletions +can happen concurrently. To sidestep the issue, make sure that +a struct file placed on the check list sees its f_count increased, +ensuring that a concurrent deletion won't result in the file +disapearing from under our feet. + +Cc: stable@vger.kernel.org +Signed-off-by: Marc Zyngier +Signed-off-by: Al Viro +Signed-off-by: Marc Zyngier +Signed-off-by: Greg Kroah-Hartman + +--- + fs/eventpoll.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/fs/eventpoll.c ++++ b/fs/eventpoll.c +@@ -1890,9 +1890,11 @@ static int ep_loop_check_proc(void *priv + * not already there, and calling reverse_path_check() + * during ep_insert(). + */ +- if (list_empty(&epi->ffd.file->f_tfile_llink)) ++ if (list_empty(&epi->ffd.file->f_tfile_llink)) { ++ get_file(epi->ffd.file); + list_add(&epi->ffd.file->f_tfile_llink, + &tfile_check_list); ++ } + } + } + mutex_unlock(&ep->mtx); +@@ -1936,6 +1938,7 @@ static void clear_tfile_check_list(void) + file = list_first_entry(&tfile_check_list, struct file, + f_tfile_llink); + list_del_init(&file->f_tfile_llink); ++ fput(file); + } + INIT_LIST_HEAD(&tfile_check_list); + } +@@ -2095,9 +2098,11 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, in + clear_tfile_check_list(); + goto error_tgt_fput; + } +- } else ++ } else { ++ get_file(tf.file); + list_add(&tf.file->f_tfile_llink, + &tfile_check_list); ++ } + mutex_lock_nested(&ep->mtx, 0); + if (is_file_epoll(tf.file)) { + tep = tf.file->private_data; diff --git a/queue-4.19/mm-hugetlb-fix-calculation-of-adjust_range_if_pmd_sharing_possible.patch b/queue-4.19/mm-hugetlb-fix-calculation-of-adjust_range_if_pmd_sharing_possible.patch new file mode 100644 index 00000000000..3bd02d1a4af --- /dev/null +++ b/queue-4.19/mm-hugetlb-fix-calculation-of-adjust_range_if_pmd_sharing_possible.patch @@ -0,0 +1,91 @@ +From 75802ca66354a39ab8e35822747cd08b3384a99a Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Thu, 6 Aug 2020 23:26:11 -0700 +Subject: mm/hugetlb: fix calculation of adjust_range_if_pmd_sharing_possible + +From: Peter Xu + +commit 75802ca66354a39ab8e35822747cd08b3384a99a upstream. + +This is found by code observation only. + +Firstly, the worst case scenario should assume the whole range was covered +by pmd sharing. The old algorithm might not work as expected for ranges +like (1g-2m, 1g+2m), where the adjusted range should be (0, 1g+2m) but the +expected range should be (0, 2g). + +Since at it, remove the loop since it should not be required. With that, +the new code should be faster too when the invalidating range is huge. + +Mike said: + +: With range (1g-2m, 1g+2m) within a vma (0, 2g) the existing code will only +: adjust to (0, 1g+2m) which is incorrect. +: +: We should cc stable. The original reason for adjusting the range was to +: prevent data corruption (getting wrong page). Since the range is not +: always adjusted correctly, the potential for corruption still exists. +: +: However, I am fairly confident that adjust_range_if_pmd_sharing_possible +: is only gong to be called in two cases: +: +: 1) for a single page +: 2) for range == entire vma +: +: In those cases, the current code should produce the correct results. +: +: To be safe, let's just cc stable. + +Fixes: 017b1660df89 ("mm: migration: fix migration of huge PMD shared pages") +Signed-off-by: Peter Xu +Signed-off-by: Andrew Morton +Reviewed-by: Mike Kravetz +Cc: Andrea Arcangeli +Cc: Matthew Wilcox +Cc: +Link: http://lkml.kernel.org/r/20200730201636.74778-1-peterx@redhat.com +Signed-off-by: Linus Torvalds +Signed-off-by: Mike Kravetz +Signed-off-by: Greg Kroah-Hartman +--- + mm/hugetlb.c | 24 ++++++++++-------------- + 1 file changed, 10 insertions(+), 14 deletions(-) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -4650,25 +4650,21 @@ static bool vma_shareable(struct vm_area + void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) + { +- unsigned long check_addr = *start; ++ unsigned long a_start, a_end; + + if (!(vma->vm_flags & VM_MAYSHARE)) + return; + +- for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) { +- unsigned long a_start = check_addr & PUD_MASK; +- unsigned long a_end = a_start + PUD_SIZE; ++ /* Extend the range to be PUD aligned for a worst case scenario */ ++ a_start = ALIGN_DOWN(*start, PUD_SIZE); ++ a_end = ALIGN(*end, PUD_SIZE); + +- /* +- * If sharing is possible, adjust start/end if necessary. +- */ +- if (range_in_vma(vma, a_start, a_end)) { +- if (a_start < *start) +- *start = a_start; +- if (a_end > *end) +- *end = a_end; +- } +- } ++ /* ++ * Intersect the range with the vma range, since pmd sharing won't be ++ * across vma after all ++ */ ++ *start = max(vma->vm_start, a_start); ++ *end = min(vma->vm_end, a_end); + } + + /* diff --git a/queue-4.19/series b/queue-4.19/series index a25b3caf12e..727de10d2cc 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -64,3 +64,8 @@ hv_netvsc-fix-the-queue_mapping-in-netvsc_vf_xmit.patch net-dsa-b53-check-for-timeout.patch powerpc-pseries-do-not-initiate-shutdown-when-system-is-running-on-ups.patch efi-add-missed-destroy_workqueue-when-efisubsys_init-fails.patch +epoll-keep-a-reference-on-files-added-to-the-check-list.patch +do_epoll_ctl-clean-the-failure-exits-up-a-bit.patch +mm-hugetlb-fix-calculation-of-adjust_range_if_pmd_sharing_possible.patch +xen-don-t-reschedule-in-preemption-off-sections.patch +clk-evict-unregistered-clks-from-parent-caches.patch diff --git a/queue-4.19/xen-don-t-reschedule-in-preemption-off-sections.patch b/queue-4.19/xen-don-t-reschedule-in-preemption-off-sections.patch new file mode 100644 index 00000000000..892374fae02 --- /dev/null +++ b/queue-4.19/xen-don-t-reschedule-in-preemption-off-sections.patch @@ -0,0 +1,96 @@ +From jgross@suse.com Mon Aug 24 10:12:00 2020 +From: Juergen Gross +Date: Thu, 20 Aug 2020 08:59:08 +0200 +Subject: xen: don't reschedule in preemption off sections +To: stable@vger.kernel.org +Cc: xen-devel@lists.xenproject.org, Juergen Gross , Boris Ostrovsky , Stefano Stabellini , Sarah Newman , Chris Brannon +Message-ID: <20200820065908.20592-1-jgross@suse.com> + +From: Juergen Gross + +For support of long running hypercalls xen_maybe_preempt_hcall() is +calling cond_resched() in case a hypercall marked as preemptible has +been interrupted. + +Normally this is no problem, as only hypercalls done via some ioctl()s +are marked to be preemptible. In rare cases when during such a +preemptible hypercall an interrupt occurs and any softirq action is +started from irq_exit(), a further hypercall issued by the softirq +handler will be regarded to be preemptible, too. This might lead to +rescheduling in spite of the softirq handler potentially having set +preempt_disable(), leading to splats like: + +BUG: sleeping function called from invalid context at drivers/xen/preempt.c:37 +in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 20775, name: xl +INFO: lockdep is turned off. +CPU: 1 PID: 20775 Comm: xl Tainted: G D W 5.4.46-1_prgmr_debug.el7.x86_64 #1 +Call Trace: + +dump_stack+0x8f/0xd0 +___might_sleep.cold.76+0xb2/0x103 +xen_maybe_preempt_hcall+0x48/0x70 +xen_do_hypervisor_callback+0x37/0x40 +RIP: e030:xen_hypercall_xen_version+0xa/0x20 +Code: ... +RSP: e02b:ffffc900400dcc30 EFLAGS: 00000246 +RAX: 000000000004000d RBX: 0000000000000200 RCX: ffffffff8100122a +RDX: ffff88812e788000 RSI: 0000000000000000 RDI: 0000000000000000 +RBP: ffffffff83ee3ad0 R08: 0000000000000001 R09: 0000000000000001 +R10: 0000000000000000 R11: 0000000000000246 R12: ffff8881824aa0b0 +R13: 0000000865496000 R14: 0000000865496000 R15: ffff88815d040000 +? xen_hypercall_xen_version+0xa/0x20 +? xen_force_evtchn_callback+0x9/0x10 +? check_events+0x12/0x20 +? xen_restore_fl_direct+0x1f/0x20 +? _raw_spin_unlock_irqrestore+0x53/0x60 +? debug_dma_sync_single_for_cpu+0x91/0xc0 +? _raw_spin_unlock_irqrestore+0x53/0x60 +? xen_swiotlb_sync_single_for_cpu+0x3d/0x140 +? mlx4_en_process_rx_cq+0x6b6/0x1110 [mlx4_en] +? mlx4_en_poll_rx_cq+0x64/0x100 [mlx4_en] +? net_rx_action+0x151/0x4a0 +? __do_softirq+0xed/0x55b +? irq_exit+0xea/0x100 +? xen_evtchn_do_upcall+0x2c/0x40 +? xen_do_hypervisor_callback+0x29/0x40 + +? xen_hypercall_domctl+0xa/0x20 +? xen_hypercall_domctl+0x8/0x20 +? privcmd_ioctl+0x221/0x990 [xen_privcmd] +? do_vfs_ioctl+0xa5/0x6f0 +? ksys_ioctl+0x60/0x90 +? trace_hardirqs_off_thunk+0x1a/0x20 +? __x64_sys_ioctl+0x16/0x20 +? do_syscall_64+0x62/0x250 +? entry_SYSCALL_64_after_hwframe+0x49/0xbe + +Fix that by testing preempt_count() before calling cond_resched(). + +In kernel 5.8 this can't happen any more due to the entry code rework +(more than 100 patches, so not a candidate for backporting). + +The issue was introduced in kernel 4.3, so this patch should go into +all stable kernels in [4.3 ... 5.7]. + +Reported-by: Sarah Newman +Fixes: 0fa2f5cb2b0ecd8 ("sched/preempt, xen: Use need_resched() instead of should_resched()") +Cc: Sarah Newman +Cc: stable@vger.kernel.org +Signed-off-by: Juergen Gross +Tested-by: Chris Brannon +Signed-off-by: Greg Kroah-Hartman +--- + drivers/xen/preempt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/xen/preempt.c ++++ b/drivers/xen/preempt.c +@@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(xen_in_preemptible_hca + asmlinkage __visible void xen_maybe_preempt_hcall(void) + { + if (unlikely(__this_cpu_read(xen_in_preemptible_hcall) +- && need_resched())) { ++ && need_resched() && !preempt_count())) { + /* + * Clear flag as we may be rescheduled on a different + * cpu.