From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 24 Aug 2020 08:22:59 +0000 (+0200)
Subject: 4.19-stable patches
X-Git-Tag: v4.4.234~17
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=490c0a354f8feabbf042cc8f16dd8cf8f7d3ab11;p=thirdparty%2Fkernel%2Fstable-queue.git

4.19-stable patches

added patches:
	clk-evict-unregistered-clks-from-parent-caches.patch
	do_epoll_ctl-clean-the-failure-exits-up-a-bit.patch
	epoll-keep-a-reference-on-files-added-to-the-check-list.patch
	mm-hugetlb-fix-calculation-of-adjust_range_if_pmd_sharing_possible.patch
	xen-don-t-reschedule-in-preemption-off-sections.patch
---

diff --git a/queue-4.19/clk-evict-unregistered-clks-from-parent-caches.patch b/queue-4.19/clk-evict-unregistered-clks-from-parent-caches.patch
new file mode 100644
index 00000000000..84f0ac53a10
--- /dev/null
+++ b/queue-4.19/clk-evict-unregistered-clks-from-parent-caches.patch
@@ -0,0 +1,129 @@
+From bdcf1dc253248542537a742ae1e7ccafdd03f2d3 Mon Sep 17 00:00:00 2001
+From: Stephen Boyd <sboyd@kernel.org>
+Date: Wed, 28 Aug 2019 11:19:59 -0700
+Subject: clk: Evict unregistered clks from parent caches
+
+From: Stephen Boyd <sboyd@kernel.org>
+
+commit bdcf1dc253248542537a742ae1e7ccafdd03f2d3 upstream.
+
+We leave a dangling pointer in each clk_core::parents array that has an
+unregistered clk as a potential parent when that clk_core pointer is
+freed by clk{_hw}_unregister(). It is impossible for the true parent of
+a clk to be set with clk_set_parent() once the dangling pointer is left
+in the cache because we compare parent pointers in
+clk_fetch_parent_index() instead of checking for a matching clk name or
+clk_hw pointer.
+
+Before commit ede77858473a ("clk: Remove global clk traversal on fetch
+parent index"), we would check clk_hw pointers, which has a higher
+chance of being the same between registration and unregistration, but it
+can still be allocated and freed by the clk provider. In fact, this has
+been a long standing problem since commit da0f0b2c3ad2 ("clk: Correct
+lookup logic in clk_fetch_parent_index()") where we stopped trying to
+compare clk names and skipped over entries in the cache that weren't
+NULL.
+
+There are good (performance) reasons to not do the global tree lookup in
+cases where the cache holds dangling pointers to parents that have been
+unregistered. Let's take the performance hit on the uncommon
+registration path instead. Loop through all the clk_core::parents arrays
+when a clk is unregistered and set the entry to NULL when the parent
+cache entry and clk being unregistered are the same pointer. This will
+fix this problem and avoid the overhead for the "normal" case.
+
+Based on a patch by Bjorn Andersson.
+
+Fixes: da0f0b2c3ad2 ("clk: Correct lookup logic in clk_fetch_parent_index()")
+Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
+Tested-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
+Signed-off-by: Stephen Boyd <sboyd@kernel.org>
+Link: https://lkml.kernel.org/r/20190828181959.204401-1-sboyd@kernel.org
+Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/clk/clk.c |   52 +++++++++++++++++++++++++++++++++++++++++-----------
+ 1 file changed, 41 insertions(+), 11 deletions(-)
+
+--- a/drivers/clk/clk.c
++++ b/drivers/clk/clk.c
+@@ -40,6 +40,17 @@ static HLIST_HEAD(clk_root_list);
+ static HLIST_HEAD(clk_orphan_list);
+ static LIST_HEAD(clk_notifier_list);
+ 
++static struct hlist_head *all_lists[] = {
++	&clk_root_list,
++	&clk_orphan_list,
++	NULL,
++};
++
++static struct hlist_head *orphan_list[] = {
++	&clk_orphan_list,
++	NULL,
++};
++
+ /***    private data structures    ***/
+ 
+ struct clk_core {
+@@ -2618,17 +2629,6 @@ static int inited = 0;
+ static DEFINE_MUTEX(clk_debug_lock);
+ static HLIST_HEAD(clk_debug_list);
+ 
+-static struct hlist_head *all_lists[] = {
+-	&clk_root_list,
+-	&clk_orphan_list,
+-	NULL,
+-};
+-
+-static struct hlist_head *orphan_list[] = {
+-	&clk_orphan_list,
+-	NULL,
+-};
+-
+ static void clk_summary_show_one(struct seq_file *s, struct clk_core *c,
+ 				 int level)
+ {
+@@ -3328,6 +3328,34 @@ static const struct clk_ops clk_nodrv_op
+ 	.set_parent	= clk_nodrv_set_parent,
+ };
+ 
++static void clk_core_evict_parent_cache_subtree(struct clk_core *root,
++						struct clk_core *target)
++{
++	int i;
++	struct clk_core *child;
++
++	for (i = 0; i < root->num_parents; i++)
++		if (root->parents[i] == target)
++			root->parents[i] = NULL;
++
++	hlist_for_each_entry(child, &root->children, child_node)
++		clk_core_evict_parent_cache_subtree(child, target);
++}
++
++/* Remove this clk from all parent caches */
++static void clk_core_evict_parent_cache(struct clk_core *core)
++{
++	struct hlist_head **lists;
++	struct clk_core *root;
++
++	lockdep_assert_held(&prepare_lock);
++
++	for (lists = all_lists; *lists; lists++)
++		hlist_for_each_entry(root, *lists, child_node)
++			clk_core_evict_parent_cache_subtree(root, core);
++
++}
++
+ /**
+  * clk_unregister - unregister a currently registered clock
+  * @clk: clock to unregister
+@@ -3366,6 +3394,8 @@ void clk_unregister(struct clk *clk)
+ 			clk_core_set_parent_nolock(child, NULL);
+ 	}
+ 
++	clk_core_evict_parent_cache(clk->core);
++
+ 	hlist_del_init(&clk->core->child_node);
+ 
+ 	if (clk->core->prepare_count)
diff --git a/queue-4.19/do_epoll_ctl-clean-the-failure-exits-up-a-bit.patch b/queue-4.19/do_epoll_ctl-clean-the-failure-exits-up-a-bit.patch
new file mode 100644
index 00000000000..576f2e67002
--- /dev/null
+++ b/queue-4.19/do_epoll_ctl-clean-the-failure-exits-up-a-bit.patch
@@ -0,0 +1,51 @@
+From 52c479697c9b73f628140dcdfcd39ea302d05482 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Sat, 22 Aug 2020 18:25:52 -0400
+Subject: do_epoll_ctl(): clean the failure exits up a bit
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 52c479697c9b73f628140dcdfcd39ea302d05482 upstream.
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/eventpoll.c |   10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -2094,10 +2094,8 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, in
+ 			mutex_lock(&epmutex);
+ 			if (is_file_epoll(tf.file)) {
+ 				error = -ELOOP;
+-				if (ep_loop_check(ep, tf.file) != 0) {
+-					clear_tfile_check_list();
++				if (ep_loop_check(ep, tf.file) != 0)
+ 					goto error_tgt_fput;
+-				}
+ 			} else {
+ 				get_file(tf.file);
+ 				list_add(&tf.file->f_tfile_llink,
+@@ -2126,8 +2124,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, in
+ 			error = ep_insert(ep, &epds, tf.file, fd, full_check);
+ 		} else
+ 			error = -EEXIST;
+-		if (full_check)
+-			clear_tfile_check_list();
+ 		break;
+ 	case EPOLL_CTL_DEL:
+ 		if (epi)
+@@ -2150,8 +2146,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, in
+ 	mutex_unlock(&ep->mtx);
+ 
+ error_tgt_fput:
+-	if (full_check)
++	if (full_check) {
++		clear_tfile_check_list();
+ 		mutex_unlock(&epmutex);
++	}
+ 
+ 	fdput(tf);
+ error_fput:
diff --git a/queue-4.19/epoll-keep-a-reference-on-files-added-to-the-check-list.patch b/queue-4.19/epoll-keep-a-reference-on-files-added-to-the-check-list.patch
new file mode 100644
index 00000000000..5b1068cdd95
--- /dev/null
+++ b/queue-4.19/epoll-keep-a-reference-on-files-added-to-the-check-list.patch
@@ -0,0 +1,66 @@
+From a9ed4a6560b8562b7e2e2bed9527e88001f7b682 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Wed, 19 Aug 2020 17:12:17 +0100
+Subject: epoll: Keep a reference on files added to the check list
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit a9ed4a6560b8562b7e2e2bed9527e88001f7b682 upstream.
+
+When adding a new fd to an epoll, and that this new fd is an
+epoll fd itself, we recursively scan the fds attached to it
+to detect cycles, and add non-epool files to a "check list"
+that gets subsequently parsed.
+
+However, this check list isn't completely safe when deletions
+can happen concurrently. To sidestep the issue, make sure that
+a struct file placed on the check list sees its f_count increased,
+ensuring that a concurrent deletion won't result in the file
+disapearing from under our feet.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/eventpoll.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -1890,9 +1890,11 @@ static int ep_loop_check_proc(void *priv
+ 			 * not already there, and calling reverse_path_check()
+ 			 * during ep_insert().
+ 			 */
+-			if (list_empty(&epi->ffd.file->f_tfile_llink))
++			if (list_empty(&epi->ffd.file->f_tfile_llink)) {
++				get_file(epi->ffd.file);
+ 				list_add(&epi->ffd.file->f_tfile_llink,
+ 					 &tfile_check_list);
++			}
+ 		}
+ 	}
+ 	mutex_unlock(&ep->mtx);
+@@ -1936,6 +1938,7 @@ static void clear_tfile_check_list(void)
+ 		file = list_first_entry(&tfile_check_list, struct file,
+ 					f_tfile_llink);
+ 		list_del_init(&file->f_tfile_llink);
++		fput(file);
+ 	}
+ 	INIT_LIST_HEAD(&tfile_check_list);
+ }
+@@ -2095,9 +2098,11 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, in
+ 					clear_tfile_check_list();
+ 					goto error_tgt_fput;
+ 				}
+-			} else
++			} else {
++				get_file(tf.file);
+ 				list_add(&tf.file->f_tfile_llink,
+ 							&tfile_check_list);
++			}
+ 			mutex_lock_nested(&ep->mtx, 0);
+ 			if (is_file_epoll(tf.file)) {
+ 				tep = tf.file->private_data;
diff --git a/queue-4.19/mm-hugetlb-fix-calculation-of-adjust_range_if_pmd_sharing_possible.patch b/queue-4.19/mm-hugetlb-fix-calculation-of-adjust_range_if_pmd_sharing_possible.patch
new file mode 100644
index 00000000000..3bd02d1a4af
--- /dev/null
+++ b/queue-4.19/mm-hugetlb-fix-calculation-of-adjust_range_if_pmd_sharing_possible.patch
@@ -0,0 +1,91 @@
+From 75802ca66354a39ab8e35822747cd08b3384a99a Mon Sep 17 00:00:00 2001
+From: Peter Xu <peterx@redhat.com>
+Date: Thu, 6 Aug 2020 23:26:11 -0700
+Subject: mm/hugetlb: fix calculation of adjust_range_if_pmd_sharing_possible
+
+From: Peter Xu <peterx@redhat.com>
+
+commit 75802ca66354a39ab8e35822747cd08b3384a99a upstream.
+
+This is found by code observation only.
+
+Firstly, the worst case scenario should assume the whole range was covered
+by pmd sharing.  The old algorithm might not work as expected for ranges
+like (1g-2m, 1g+2m), where the adjusted range should be (0, 1g+2m) but the
+expected range should be (0, 2g).
+
+Since at it, remove the loop since it should not be required.  With that,
+the new code should be faster too when the invalidating range is huge.
+
+Mike said:
+
+: With range (1g-2m, 1g+2m) within a vma (0, 2g) the existing code will only
+: adjust to (0, 1g+2m) which is incorrect.
+:
+: We should cc stable.  The original reason for adjusting the range was to
+: prevent data corruption (getting wrong page).  Since the range is not
+: always adjusted correctly, the potential for corruption still exists.
+:
+: However, I am fairly confident that adjust_range_if_pmd_sharing_possible
+: is only gong to be called in two cases:
+:
+: 1) for a single page
+: 2) for range == entire vma
+:
+: In those cases, the current code should produce the correct results.
+:
+: To be safe, let's just cc stable.
+
+Fixes: 017b1660df89 ("mm: migration: fix migration of huge PMD shared pages")
+Signed-off-by: Peter Xu <peterx@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>
+Link: http://lkml.kernel.org/r/20200730201636.74778-1-peterx@redhat.com
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c |   24 ++++++++++--------------
+ 1 file changed, 10 insertions(+), 14 deletions(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -4650,25 +4650,21 @@ static bool vma_shareable(struct vm_area
+ void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+ 				unsigned long *start, unsigned long *end)
+ {
+-	unsigned long check_addr = *start;
++	unsigned long a_start, a_end;
+ 
+ 	if (!(vma->vm_flags & VM_MAYSHARE))
+ 		return;
+ 
+-	for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
+-		unsigned long a_start = check_addr & PUD_MASK;
+-		unsigned long a_end = a_start + PUD_SIZE;
++	/* Extend the range to be PUD aligned for a worst case scenario */
++	a_start = ALIGN_DOWN(*start, PUD_SIZE);
++	a_end = ALIGN(*end, PUD_SIZE);
+ 
+-		/*
+-		 * If sharing is possible, adjust start/end if necessary.
+-		 */
+-		if (range_in_vma(vma, a_start, a_end)) {
+-			if (a_start < *start)
+-				*start = a_start;
+-			if (a_end > *end)
+-				*end = a_end;
+-		}
+-	}
++	/*
++	 * Intersect the range with the vma range, since pmd sharing won't be
++	 * across vma after all
++	 */
++	*start = max(vma->vm_start, a_start);
++	*end = min(vma->vm_end, a_end);
+ }
+ 
+ /*
diff --git a/queue-4.19/series b/queue-4.19/series
index a25b3caf12e..727de10d2cc 100644
--- a/queue-4.19/series
+++ b/queue-4.19/series
@@ -64,3 +64,8 @@ hv_netvsc-fix-the-queue_mapping-in-netvsc_vf_xmit.patch
 net-dsa-b53-check-for-timeout.patch
 powerpc-pseries-do-not-initiate-shutdown-when-system-is-running-on-ups.patch
 efi-add-missed-destroy_workqueue-when-efisubsys_init-fails.patch
+epoll-keep-a-reference-on-files-added-to-the-check-list.patch
+do_epoll_ctl-clean-the-failure-exits-up-a-bit.patch
+mm-hugetlb-fix-calculation-of-adjust_range_if_pmd_sharing_possible.patch
+xen-don-t-reschedule-in-preemption-off-sections.patch
+clk-evict-unregistered-clks-from-parent-caches.patch
diff --git a/queue-4.19/xen-don-t-reschedule-in-preemption-off-sections.patch b/queue-4.19/xen-don-t-reschedule-in-preemption-off-sections.patch
new file mode 100644
index 00000000000..892374fae02
--- /dev/null
+++ b/queue-4.19/xen-don-t-reschedule-in-preemption-off-sections.patch
@@ -0,0 +1,96 @@
+From jgross@suse.com  Mon Aug 24 10:12:00 2020
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 20 Aug 2020 08:59:08 +0200
+Subject: xen: don't reschedule in preemption off sections
+To: stable@vger.kernel.org
+Cc: xen-devel@lists.xenproject.org, Juergen Gross <jgross@suse.com>, Boris Ostrovsky <boris.ostrovsky@oracle.com>, Stefano Stabellini <sstabellini@kernel.org>, Sarah Newman <srn@prgmr.com>, Chris Brannon <cmb@prgmr.com>
+Message-ID: <20200820065908.20592-1-jgross@suse.com>
+
+From: Juergen Gross <jgross@suse.com>
+
+For support of long running hypercalls xen_maybe_preempt_hcall() is
+calling cond_resched() in case a hypercall marked as preemptible has
+been interrupted.
+
+Normally this is no problem, as only hypercalls done via some ioctl()s
+are marked to be preemptible. In rare cases when during such a
+preemptible hypercall an interrupt occurs and any softirq action is
+started from irq_exit(), a further hypercall issued by the softirq
+handler will be regarded to be preemptible, too. This might lead to
+rescheduling in spite of the softirq handler potentially having set
+preempt_disable(), leading to splats like:
+
+BUG: sleeping function called from invalid context at drivers/xen/preempt.c:37
+in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 20775, name: xl
+INFO: lockdep is turned off.
+CPU: 1 PID: 20775 Comm: xl Tainted: G D W 5.4.46-1_prgmr_debug.el7.x86_64 #1
+Call Trace:
+<IRQ>
+dump_stack+0x8f/0xd0
+___might_sleep.cold.76+0xb2/0x103
+xen_maybe_preempt_hcall+0x48/0x70
+xen_do_hypervisor_callback+0x37/0x40
+RIP: e030:xen_hypercall_xen_version+0xa/0x20
+Code: ...
+RSP: e02b:ffffc900400dcc30 EFLAGS: 00000246
+RAX: 000000000004000d RBX: 0000000000000200 RCX: ffffffff8100122a
+RDX: ffff88812e788000 RSI: 0000000000000000 RDI: 0000000000000000
+RBP: ffffffff83ee3ad0 R08: 0000000000000001 R09: 0000000000000001
+R10: 0000000000000000 R11: 0000000000000246 R12: ffff8881824aa0b0
+R13: 0000000865496000 R14: 0000000865496000 R15: ffff88815d040000
+? xen_hypercall_xen_version+0xa/0x20
+? xen_force_evtchn_callback+0x9/0x10
+? check_events+0x12/0x20
+? xen_restore_fl_direct+0x1f/0x20
+? _raw_spin_unlock_irqrestore+0x53/0x60
+? debug_dma_sync_single_for_cpu+0x91/0xc0
+? _raw_spin_unlock_irqrestore+0x53/0x60
+? xen_swiotlb_sync_single_for_cpu+0x3d/0x140
+? mlx4_en_process_rx_cq+0x6b6/0x1110 [mlx4_en]
+? mlx4_en_poll_rx_cq+0x64/0x100 [mlx4_en]
+? net_rx_action+0x151/0x4a0
+? __do_softirq+0xed/0x55b
+? irq_exit+0xea/0x100
+? xen_evtchn_do_upcall+0x2c/0x40
+? xen_do_hypervisor_callback+0x29/0x40
+</IRQ>
+? xen_hypercall_domctl+0xa/0x20
+? xen_hypercall_domctl+0x8/0x20
+? privcmd_ioctl+0x221/0x990 [xen_privcmd]
+? do_vfs_ioctl+0xa5/0x6f0
+? ksys_ioctl+0x60/0x90
+? trace_hardirqs_off_thunk+0x1a/0x20
+? __x64_sys_ioctl+0x16/0x20
+? do_syscall_64+0x62/0x250
+? entry_SYSCALL_64_after_hwframe+0x49/0xbe
+
+Fix that by testing preempt_count() before calling cond_resched().
+
+In kernel 5.8 this can't happen any more due to the entry code rework
+(more than 100 patches, so not a candidate for backporting).
+
+The issue was introduced in kernel 4.3, so this patch should go into
+all stable kernels in [4.3 ... 5.7].
+
+Reported-by: Sarah Newman <srn@prgmr.com>
+Fixes: 0fa2f5cb2b0ecd8 ("sched/preempt, xen: Use need_resched() instead of should_resched()")
+Cc: Sarah Newman <srn@prgmr.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Tested-by: Chris Brannon <cmb@prgmr.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/preempt.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/xen/preempt.c
++++ b/drivers/xen/preempt.c
+@@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(xen_in_preemptible_hca
+ asmlinkage __visible void xen_maybe_preempt_hcall(void)
+ {
+ 	if (unlikely(__this_cpu_read(xen_in_preemptible_hcall)
+-		     && need_resched())) {
++		     && need_resched() && !preempt_count())) {
+ 		/*
+ 		 * Clear flag as we may be rescheduled on a different
+ 		 * cpu.